Showing preview only (2,395K chars total). Download the full file or copy to clipboard to get everything.
Repository: z1069614715/objectdetection_script
Branch: master
Commit: 02ba8c6fb2ad
Files: 351
Total size: 2.2 MB
Directory structure:
gitextract_1c2iago4/
├── .gitignore
├── Ultralytics-YOLO-project.md
├── bilibili-guide.md
├── cv-attention/
│ ├── A2Attention.py
│ ├── BAM.py
│ ├── Biformer.py
│ ├── CAA.py
│ ├── CBAM.py
│ ├── CPCA.py
│ ├── CloAttention.py
│ ├── CoTAttention.py
│ ├── CoordAttention.py
│ ├── DAttention.py
│ ├── ECA.py
│ ├── ELA.py
│ ├── EMA.py
│ ├── EffectiveSE.py
│ ├── GAM.py
│ ├── GC.py
│ ├── GE.py
│ ├── LSKA.py
│ ├── LSKBlock.py
│ ├── MHSA.py
│ ├── MLCA.py
│ ├── MobileViTAttention.py
│ ├── ParNetAttention.py
│ ├── PolarizedSelfAttention.py
│ ├── S2Attention.py
│ ├── SE.py
│ ├── SGE.py
│ ├── SK.py
│ ├── SequentialSelfAttention.py
│ ├── ShuffleAttention.py
│ ├── SimAM.py
│ ├── TripletAttention.py
│ └── readme.md
├── cvpr2025-deim-project.md
├── damo-yolo/
│ ├── Annotations/
│ │ └── ReadMe.md
│ ├── JPEGImages/
│ │ └── ReadMe.md
│ ├── readme.md
│ └── voc2coco.py
├── data-offline-aug/
│ ├── object_detection_data_aug.py
│ ├── readme.md
│ └── segment_data_aug.py
├── mmdet-course/
│ ├── config/
│ │ ├── atss_r50_fpn_dyhead_1x_visdrone.py
│ │ ├── cascade-rcnn_r50_fpn_1x_visdrone.py
│ │ ├── ddq-detr-4scale_r50_8xb2-12e_visdrone.py
│ │ ├── dino-4scale_r50_8xb2-12e_visdrone.py
│ │ ├── faster-rcnn_r50_fpn_ciou_1x_visdrone.py
│ │ ├── gfl_r50_fpn_1x_visdrone.py
│ │ ├── retinanet_r50_fpn_1x_visdrone.py
│ │ ├── rtmdet_tiny_8xb32-300e_visdrone.py
│ │ ├── tood_r50_fpn_1x_visdrone.py
│ │ └── yolox_tiny_8xb8-300e_visdrone.py
│ ├── mmdet2yolo.py
│ ├── readme.md
│ └── yolo2coco.py
├── module-info/
│ ├── CVPR2023-SMPConv.md
│ ├── CVPR2024-DCMPNet.md
│ ├── CVPR2024-FADC.md
│ ├── CVPR2024-PKINet.md
│ ├── CVPR2024-ParameterNet.md
│ ├── CVPR2024-RMT.md
│ ├── CVPR2024-RepVIT.md
│ ├── CVPR2024-Rewrite the Stars.md
│ ├── CVPR2024-SFSConv.md
│ ├── CVPR2024-TransNext.md
│ ├── CVPR2024-UniRepLKNet.md
│ ├── CVPR2025-BHViT.md
│ ├── CVPR2025-DarkIR.md
│ ├── CVPR2025-EVSSM.md
│ ├── CVPR2025-EfficientViM.md
│ ├── CVPR2025-FDConv.md
│ ├── CVPR2025-GroupMamba.md
│ ├── CVPR2025-LSNet.md
│ ├── CVPR2025-MambaIRV2.md
│ ├── CVPR2025-MambaOut.md
│ ├── CVPR2025-MambaVision.md
│ ├── CVPR2025-MobileMamba.md
│ ├── CVPR2025-Mona.md
│ ├── CVPR2025-OverLoCK.md
│ ├── CVPR2025-SCSegamba.md
│ ├── CVPR2025-Transformers without Normalization.md
│ ├── CVPR2025-vHeat.md
│ ├── ICLR2025-Pola.md
│ ├── ICLR2025-ToST.md
│ └── TPAMI2025-HyperYOLO.md
├── mutilmodel-project.md
├── objectdetection-tricks/
│ ├── readme.md
│ ├── tricks_1.py
│ ├── tricks_10.py
│ ├── tricks_11.py
│ ├── tricks_12.py
│ ├── tricks_13.py
│ ├── tricks_14.py
│ ├── tricks_15.py
│ ├── tricks_16.py
│ ├── tricks_2.py
│ ├── tricks_3.py
│ ├── tricks_4.py
│ ├── tricks_5.py
│ ├── tricks_6.py
│ ├── tricks_7.py
│ ├── tricks_8.py
│ └── tricks_9.py
├── readme.md
├── visdrone2019-benchmark/
│ └── readme.md
├── yolo/
│ ├── data.yaml
│ ├── dataset/
│ │ ├── VOCdevkit/
│ │ │ ├── Annotations/
│ │ │ │ └── ReadMe.md
│ │ │ ├── JPEGImages/
│ │ │ │ └── ReadMe.md
│ │ │ └── txt/
│ │ │ └── ReadMe.md
│ │ ├── split_data.py
│ │ └── xml2txt.py
│ └── readme.md
├── yolo-gradcam/
│ ├── README.md
│ ├── yolov11_heatmap.py
│ ├── yolov5_heatmap.py
│ ├── yolov7_heatmap.py
│ ├── yolov8_heatmap.py
│ └── yolov9_heatmap.py
└── yolo-improve/
├── CAM.py
├── iou.py
├── paper.md
├── readme.md
├── rtdetr-compress.md
├── rtdetr-distill.md
├── rtdetr-project.md
├── ultralytics-yolo/
│ ├── get_COCO_metrice.py
│ ├── heatmap.py
│ ├── requirements.txt
│ ├── train.py
│ ├── val.py
│ └── yolo2coco.py
├── yolov11-project.md
├── yolov5-AIFI.py
├── yolov5-AUX/
│ ├── benchmarks.py
│ ├── data/
│ │ ├── Argoverse.yaml
│ │ ├── GlobalWheat2020.yaml
│ │ ├── ImageNet.yaml
│ │ ├── Objects365.yaml
│ │ ├── SKU-110K.yaml
│ │ ├── VOC.yaml
│ │ ├── VisDrone.yaml
│ │ ├── coco.yaml
│ │ ├── coco128-seg.yaml
│ │ ├── coco128.yaml
│ │ ├── hyps/
│ │ │ ├── hyp.Objects365.yaml
│ │ │ ├── hyp.VOC.yaml
│ │ │ ├── hyp.no-augmentation.yaml
│ │ │ ├── hyp.scratch-high.yaml
│ │ │ ├── hyp.scratch-low.yaml
│ │ │ └── hyp.scratch-med.yaml
│ │ ├── scripts/
│ │ │ ├── download_weights.sh
│ │ │ ├── get_coco.sh
│ │ │ ├── get_coco128.sh
│ │ │ └── get_imagenet.sh
│ │ └── xView.yaml
│ ├── detect.py
│ ├── export.py
│ ├── hubconf.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── common.py
│ │ ├── experimental.py
│ │ ├── hub/
│ │ │ ├── anchors.yaml
│ │ │ ├── yolov3-spp.yaml
│ │ │ ├── yolov3-tiny.yaml
│ │ │ ├── yolov3.yaml
│ │ │ ├── yolov5-bifpn.yaml
│ │ │ ├── yolov5-fpn.yaml
│ │ │ ├── yolov5-p2.yaml
│ │ │ ├── yolov5-p34.yaml
│ │ │ ├── yolov5-p6.yaml
│ │ │ ├── yolov5-p7.yaml
│ │ │ ├── yolov5-panet.yaml
│ │ │ ├── yolov5l6.yaml
│ │ │ ├── yolov5m6.yaml
│ │ │ ├── yolov5n6.yaml
│ │ │ ├── yolov5s-LeakyReLU.yaml
│ │ │ ├── yolov5s-ghost.yaml
│ │ │ ├── yolov5s-transformer.yaml
│ │ │ ├── yolov5s6.yaml
│ │ │ └── yolov5x6.yaml
│ │ ├── segment/
│ │ │ ├── yolov5l-seg.yaml
│ │ │ ├── yolov5m-seg.yaml
│ │ │ ├── yolov5n-seg.yaml
│ │ │ ├── yolov5s-seg.yaml
│ │ │ └── yolov5x-seg.yaml
│ │ ├── tf.py
│ │ ├── yolo.py
│ │ ├── yolov5_aux.yaml
│ │ ├── yolov5l.yaml
│ │ ├── yolov5m.yaml
│ │ ├── yolov5n.yaml
│ │ ├── yolov5s.yaml
│ │ └── yolov5x.yaml
│ ├── train.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── activations.py
│ │ ├── augmentations.py
│ │ ├── autoanchor.py
│ │ ├── autobatch.py
│ │ ├── aws/
│ │ │ ├── __init__.py
│ │ │ ├── mime.sh
│ │ │ ├── resume.py
│ │ │ └── userdata.sh
│ │ ├── callbacks.py
│ │ ├── dataloaders.py
│ │ ├── docker/
│ │ │ ├── Dockerfile
│ │ │ ├── Dockerfile-arm64
│ │ │ └── Dockerfile-cpu
│ │ ├── downloads.py
│ │ ├── flask_rest_api/
│ │ │ ├── README.md
│ │ │ ├── example_request.py
│ │ │ └── restapi.py
│ │ ├── general.py
│ │ ├── google_app_engine/
│ │ │ ├── Dockerfile
│ │ │ ├── additional_requirements.txt
│ │ │ └── app.yaml
│ │ ├── loggers/
│ │ │ ├── __init__.py
│ │ │ ├── clearml/
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── clearml_utils.py
│ │ │ │ └── hpo.py
│ │ │ └── comet/
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── comet_utils.py
│ │ │ ├── hpo.py
│ │ │ └── optimizer_config.json
│ │ ├── loss.py
│ │ ├── metrics.py
│ │ ├── plots.py
│ │ ├── segment/
│ │ │ ├── __init__.py
│ │ │ ├── augmentations.py
│ │ │ ├── dataloaders.py
│ │ │ ├── general.py
│ │ │ ├── loss.py
│ │ │ ├── metrics.py
│ │ │ └── plots.py
│ │ ├── torch_utils.py
│ │ └── triton.py
│ └── val.py
├── yolov5-C3RFEM.py
├── yolov5-CARAFE.py
├── yolov5-CCFM.py
├── yolov5-ContextAggregation.py
├── yolov5-CoordConv.py
├── yolov5-DBB.py
├── yolov5-DCN.py
├── yolov5-DCNV3/
│ ├── commod.py
│ └── ops_dcnv3/
│ ├── functions/
│ │ ├── __init__.py
│ │ └── dcnv3_func.py
│ ├── make.sh
│ ├── modules/
│ │ ├── __init__.py
│ │ └── dcnv3.py
│ ├── setup.py
│ ├── src/
│ │ ├── cpu/
│ │ │ ├── dcnv3_cpu.cpp
│ │ │ └── dcnv3_cpu.h
│ │ ├── cuda/
│ │ │ ├── dcnv3_cuda.cu
│ │ │ ├── dcnv3_cuda.h
│ │ │ └── dcnv3_im2col_cuda.cuh
│ │ ├── dcnv3.h
│ │ └── vision.cpp
│ └── test.py
├── yolov5-DSConv.py
├── yolov5-DecoupledHead.py
├── yolov5-DySnakeConv.py
├── yolov5-EVC.py
├── yolov5-FasterBlock.py
├── yolov5-GFPN/
│ ├── extra_modules.py
│ └── yolov5_GFPN.yaml
├── yolov5-GOLDYOLO/
│ ├── common.py
│ ├── yolo.py
│ ├── yolov5n-goldyolo.yaml
│ ├── yolov7-goldyolo.yaml
│ └── yolov7-tiny-goldyolo.yaml
├── yolov5-NWD.py
├── yolov5-OTA/
│ └── loss.py
├── yolov5-RepNCSPELAN.py
├── yolov5-SAConv.py
├── yolov5-TSCODE.py
├── yolov5-aLRPLoss.py
├── yolov5-asf.py
├── yolov5-backbone/
│ ├── CVPR2023-EfficientViT/
│ │ └── EfficientViT.py
│ ├── CVPR2024-StarNet/
│ │ └── starnet.py
│ ├── ConvNextV2/
│ │ └── convnextv2.py
│ ├── EMO/
│ │ └── emo.py
│ ├── EfficientFormerV2/
│ │ └── EfficientFormerV2.py
│ ├── EfficientViT/
│ │ └── efficientViT.py
│ ├── FocalNet/
│ │ └── FocalNet.py
│ ├── LSKNet/
│ │ └── lsknet.py
│ ├── MobileNetV4/
│ │ └── mobilenetv4.py
│ ├── NextViT/
│ │ └── NextViT.py
│ ├── ODConv/
│ │ ├── od_mobilenetv2.py
│ │ ├── od_resnet.py
│ │ └── odconv.py
│ ├── ODConvFuse/
│ │ ├── od_mobilenetv2.py
│ │ ├── od_resnet.py
│ │ └── odconv.py
│ ├── PoolFormer/
│ │ └── poolformer.py
│ ├── RIFormer/
│ │ └── RIFormer.py
│ ├── RepViT/
│ │ └── repvit.py
│ ├── SwinTransformer/
│ │ └── SwinTransformer.py
│ ├── UniRepLKNet/
│ │ └── unireplknet.py
│ ├── VanillaNet/
│ │ └── VanillaNet.py
│ ├── fasternet/
│ │ ├── faster_cfg/
│ │ │ ├── fasternet_l.yaml
│ │ │ ├── fasternet_m.yaml
│ │ │ ├── fasternet_s.yaml
│ │ │ ├── fasternet_t0.yaml
│ │ │ ├── fasternet_t1.yaml
│ │ │ └── fasternet_t2.yaml
│ │ └── fasternet.py
│ ├── inceptionnext/
│ │ └── inceptionnext.py
│ ├── main.py
│ ├── yolo.py
│ └── yolov5-custom.yaml
├── yolov5-dyhead.py
├── yolov5-res2block.py
├── yolov5-softnms.py
├── yolov5v7-light.md
├── yolov7-CoordConv.py
├── yolov7-DBB.py
├── yolov7-DCN.py
├── yolov7-DCNV3.py
├── yolov7-DSConv.py
├── yolov7-DecoupledHead.py
├── yolov7-DySnakeConv.py
├── yolov7-EVC.py
├── yolov7-MPDiou.py
├── yolov7-NWD.py
├── yolov7-PConv.py
├── yolov7-RFEM.py
├── yolov7-RepNCSPELAN.py
├── yolov7-SAConv.py
├── yolov7-asf.py
├── yolov7-head/
│ ├── yolov7-tiny-5-heads.yaml
│ ├── yolov7-tiny-P2.yaml
│ └── yolov7-tiny-P6.yaml
├── yolov7-iou.py
├── yolov7-odconv.py
├── yolov7-slimneck.py
├── yolov7-softnms.py
├── yolov8-DCN.py
├── yolov8-compress.md
├── yolov8-distill.md
├── yolov8-erf.py
├── yolov8-objectcount.py
├── yolov8-track.py
├── yolov8.py
├── yolov8v10-project.md
└── yolov9-backbone/
├── yolo.py
└── yolov9-c-custom.yaml
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# Profiling
*.pclprof
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
.idea
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# VSCode project settings
.vscode/
# Rope project settings
.ropeproject
# mkdocs documentation
/site
mkdocs_github_authors.yaml
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# datasets and projects
datasets/
runs/
wandb/
tests/
logs/
.DS_Store
# Neural Network weights -----------------------------------------------------------------------------------------------
weights/
*.weights
*.pt
*.pb
*.onnx
*.engine
*.mlmodel
*.mlpackage
*.torchscript
*.tflite
*.h5
*_saved_model/
*_web_model/
*_openvino_model/
*_paddle_model/
pnnx*
# Autogenerated files for tests
/ultralytics/assets/
# dataset cache
*.cache
================================================
FILE: Ultralytics-YOLO-project.md
================================================
# Ultralytics-YOLO项目详细说明
1. 本项目集成了YOLOv8、v10、v11、v12乃至前沿的YOLO26等全系列基础模型。 无论是做横向对比实验,还是纵向的版本改进,无需到处找资源,一个项目就能满足你所有的实验需求!
2. 核心代码已实现高度模块化与解耦,专为新手优化。 你完全不需要死磕底层复杂代码,只需像搭积木一样简单修改YAML配置文件,就能轻松实现各种改进模块的自由组合。
3. 面对日益内卷的YOLO赛道,简单的“缝合”已难满足毕业要求。 本项目不仅提供现成的创新方案,更配套独家“二次创新”课程,授人以渔。我们将手把手教你掌握模块设计的底层逻辑,助你从“模仿者”进阶为“创造者”,设计出独属于你的创新模块。
4. 针对有代码基础但受困于Ultralytics复杂架构的同学, 本项目引入了来自DFine、DEIM项目中成熟的“万物皆可融”架构思想。你无需纠结模块注册等信息,只需遵循我所提供的标准接口规范,即可将自定义魔改模块无缝融入YAML配置,与各类CSP变种灵活结合。
5. 实验跑通了,却不知道如何写创新点? 本项目将定期拆解高分论文,传授写作心法,教你如何将实验成果转化为逻辑严密、亮点突出的高质量学术论文,解决写作难题!
6. 毕业设计缺少高大上的展示界面? 别担心,项目会内置基于PyQt或HTML的通用可视化界面,开箱即用,完美补齐毕业论文的最后一块拼图,助你从容应对答辩!
7. 购买即享专属技术交流群, 这里有业内公认的高效答疑服务,以及志同道合的伙伴互助交流。拒绝闭门造车,让我们带你避开深坑,高效通关!
## 针对于已经入手了yolov8/yolo11项目的同学来说,如果你有以下几点需求,可以考虑追加入手!
1. 想用最新的YOLO26做实验!而且本项目支持v8、v10、11、12、26全系列版本!
2. 想深入学习改进创新的同学,本项目会附带二次创新的通用教程,手把手教你设计出属于自己的创新模块!
3. 做完实验不知道怎么写论文?本项目会定期拆解高分论文案例,教你如何把实验结果写成逻辑清晰、亮点突出的高质量学术论文
4. 想自己魔改模块的同学!本项目提供超级简单的模块注册方式,只需按照教程操作,就能轻松注册自己的模块,还能和各种CSP变种随意组合!
## 模块列表(这些模块均已在代码中注册好,只需要修改yaml可以直接实验)
- ultralytics/nn/extra_modules/attention
1. ultralytics/nn/extra_modules/attention/SEAM.py
2. CVPR2021|ultralytics/nn/extra_modules/attention/ca.py
3. ICASSP2023|ultralytics/nn/extra_modules/attention/ema.py
4. ICML2021|ultralytics/nn/extra_modules/attention/simam.py
5. ICCV2023|ultralytics/nn/extra_modules/attention/lsk.py
6. WACV2024|ultralytics/nn/extra_modules/attention/DeformableLKA.py
7. ultralytics/nn/extra_modules/attention/mlca.py
8. BIBM2024|ultralytics/nn/extra_modules/attention/FSA.py
9. AAAI2025|ultralytics/nn/extra_modules/attention/CDFA.py
10. TGRS2025|ultralytics/nn/extra_modules/attention/MCA.py
11. CVPR2025|ultralytics/nn/extra_modules/attention/CASAB.py
12. NN2025|ultralytics/nn/extra_modules/attention/KSFA.py
13. TPAMI2025|ultralytics/nn/extra_modules/attention/GQL.py
14. TGRS2025|ultralytics/nn/extra_modules/attention/ACA.py
15. TGRS2025|ultralytics/nn/extra_modules/attention/DHPF.py
16. TGRS2025|ultralytics/nn/extra_modules/attention/ACAB.py
- ultralytics/nn/extra_modules/conv_module(此部分内容教程可以看GuideVideo-MG.md中的改进模块-使用教程的第五节)
1. CVPR2021|ultralytics/nn/extra_modules/conv_module/dbb.py
2. TIP2024|ultralytics/nn/extra_modules/conv_module/deconv.py
3. ICCV2023|ultralytics/nn/extra_modules/conv_module/dynamic_snake_conv.py
4. CVPR2023|ultralytics/nn/extra_modules/conv_module/pconv.py
5. AAAI2025|ultralytics/nn/extra_modules/conv_module/psconv.py
6. CVPR2025|ultralytics/nn/extra_modules/conv_module/ShiftwiseConv.py
7. ultralytics/nn/extra_modules/conv_module/wdbb.py
8. ultralytics/nn/extra_modules/conv_module/deepdbb.py
9. ECCV2024|ultralytics/nn/extra_modules/conv_module/wtconv2d.py
10. CVPR2023|ultralytics/nn/extra_modules/conv_module/ScConv.py
11. ultralytics/nn/extra_modules/conv_module/dcnv2.py
12. CVPR2024|ultralytics/nn/extra_modules/conv_module/DilatedReparamConv.py
13. ultralytics/nn/extra_modules/conv_module/gConv.py
14. CVPR2024|ultralytics/nn/extra_modules/conv_module/IDWC.py
15. ultralytics/nn/extra_modules/conv_module/DSA.py
16. CVPR2025|ultralytics/nn/extra_modules/conv_module/FDConv.py
17. CVPR2023|ultralytics/nn/extra_modules/conv_module/dcnv3.py
18. CVPR2024|ultralytics/nn/extra_modules/conv_module/dcnv4.py
19. CVPR2024|ultralytics/nn/extra_modules/conv_module/DynamicConv.py
20. CVPR2024|ultralytics/nn/extra_modules/conv_module/FADC.py
21. CVPR2023|ultralytics/nn/extra_modules/conv_module/SMPConv.py
22. MIA2025|ultralytics/nn/extra_modules/conv_module/FourierConv.py
23. CVPR2024|ultralytics/nn/extra_modules/conv_module/SFSConv.py
24. ICCV2025|ultralytics/nn/extra_modules/conv_module/MBRConv.py
25. ICCV2025|ultralytics/nn/extra_modules/conv_module/ConvAttn.py
26. ICCV2025|ultralytics/nn/extra_modules/conv_module/Converse2D.py
27. CVPR2025|ultralytics/nn/extra_modules/conv_module/gcconv.py
28. ACCV2024|ultralytics/nn/extra_modules/conv_module/RMBC.py
29. CVPR2026|ultralytics/nn/extra_modules/conv_module/DEGConv.py
- engine/extre_module/custom_nn/stem
1. ultralytics/nn/extra_modules/stem/SRFD.py
2. ultralytics/nn/extra_modules/stem/LoG.py
3. ICCV2023|ultralytics/nn/extra_modules/stem/RepStem.py
- ultralytics/nn/extra_modules/upsample
1. CVPR2024|ultralytics/nn/extra_modules/upsample/eucb.py
2. CVPR2024|ultralytics/nn/extra_modules/upsample/eucb_sc.py
3. ultralytics/nn/extra_modules/upsample/WaveletUnPool.py
4. ICCV2019|ultralytics/nn/extra_modules/upsample/CARAFE.py
5. ICCV2023|ultralytics/nn/extra_modules/upsample/DySample.py
6. ICCV2025|ultralytics/nn/extra_modules/upsample/Converse2D_Up.py
7. CVPR2025|ultralytics/nn/extra_modules/upsample/DSUB.py
- ultralytics/nn/extra_modules/downsample
1. TIP2020|ultralytics/nn/extra_modules/downsample/gcnet.py
2. 自研模块|ultralytics/nn/extra_modules/downsample/lawds.py
3. ultralytics/nn/extra_modules/downsample/WaveletPool.py
4. ultralytics/nn/extra_modules/downsample/ADown.py
5. ultralytics/nn/extra_modules/downsample/YOLOV7Down.py
6. ultralytics/nn/extra_modules/downsample/SPDConv.py
7. ultralytics/nn/extra_modules/downsample/HWD.py
8. ultralytics/nn/extra_modules/downsample/DRFD.py
9. TGRS2025|ultralytics/nn/extra_modules/conv_module/FSConv.py
- ultralytics/nn/extra_modules/module
1. AAAI2025|ultralytics/nn/extra_modules/module/APBottleneck.py
2. CVPR2025|ultralytics/nn/extra_modules/module/efficientVIM.py
3. CVPR2023|ultralytics/nn/extra_modules/module/fasterblock.py
4. CVPR2024|ultralytics/nn/extra_modules/module/starblock.py
5. ultralytics/nn/extra_modules/module/DWR.py
6. CVPR2024|ultralytics/nn/extra_modules/module/UniRepLKBlock.py
7. CVPR2025|ultralytics/nn/extra_modules/module/mambaout.py
8. AAAI2024|ultralytics/nn/extra_modules/module/DynamicFilter.py
9. ultralytics/nn/extra_modules/module/StripBlock.py
10. TGRS2024|ultralytics/nn/extra_modules/module/elgca.py
11. CVPR2024|ultralytics/nn/extra_modules/module/LEGM.py
12. ICCV2023|ultralytics/nn/extra_modules/module/iRMB.py
13. TPAMI2025|ultralytics/nn/extra_modules/module/MSBlock.py
14. ICLR2024|ultralytics/nn/extra_modules/module/FATBlock.py
15. CVPR2024|ultralytics/nn/extra_modules/module/MSCB.py
16. ultralytics/nn/extra_modules/module/LEGBlock.py
17. ultralytics/nn/extra_modules/module/GLSA.py
18. CVPR2025|ultralytics/nn/extra_modules/module/RCB.py
19. ECCV2024|ultralytics/nn/extra_modules/module/JDPM.py
20. CVPR2025|ultralytics/nn/extra_modules/module/vHeat.py
21. CVPR2025|ultralytics/nn/extra_modules/module/EBlock.py
22. CVPR2025|ultralytics/nn/extra_modules/module/DBlock.py
23. ECCV2024|ultralytics/nn/extra_modules/module/FMB.py
24. CVPR2024|ultralytics/nn/extra_modules/module/IDWB.py
25. ECCV2022|ultralytics/nn/extra_modules/module/LFE.py
26. AAAI2025|ultralytics/nn/extra_modules/module/FCM.py
27. CVPR2024|ultralytics/nn/extra_modules/module/RepViTBlock.py
28. CVPR2024|ultralytics/nn/extra_modules/module/PKIModule.py
29. CVPR2024|ultralytics/nn/extra_modules/module/camixer.py
30. ICCV2025|ultralytics/nn/extra_modules/module/ESC.py
31. CVPR2025|ultralytics/nn/extra_modules/module/nnWNet.py
32. TGRS2025|ultralytics/nn/extra_modules/module/ARF.py
33. AAAI2024|ultralytics/nn/extra_modules/module/CFBlock.py
34. IJCV2024|ultralytics/nn/extra_modules/module/FMA.py
35. ultralytics/nn/extra_modules/module/LWGA.py
36. TGRS2025|ultralytics/nn/extra_modules/module/CSSC.py
37. TGRS2025|ultralytics/nn/extra_modules/module/CNCM.py
38. ICCV2025|ultralytics/nn/extra_modules/module/HFRB.py
39. ICIP2025|ultralytics/nn/extra_modules/module/EVA.py
40. CVPR2025|ultralytics/nn/extra_modules/module/IEL.py
41. MICCAI2023|ultralytics/nn/extra_modules/module/MFEBlock.py
42. AAAI2026|ultralytics/nn/extra_modules/module/PartialNetBlock.py
43. TGRS2025|ultralytics/nn/extra_modules/module/DRG.py
44. ultralytics/nn/extra_modules/module/Wave2D.py
45. TGRS2025|ultralytics/nn/extra_modules/module/GLGM.py
46. TGRS2025|ultralytics/nn/extra_modules/module/MAC.py
47. AAAI2026|ultralytics/nn/extra_modules/module/SPJFB.py
- ultralytics/nn/extra_modules/block
1. ultralytics/nn/extra_modules/block/CSPBlock.py
2. TPAMI2025|ultralytics/nn/extra_modules/block/MANet.py
3. TPAMI2024|ultralytics/nn/extra_modules/block/MetaFormer.py
- ultralytics/nn/extra_modules/transformer
1. ICLR2025|ultralytics/nn/extra_modules/transformer/PolaLinearAttention.py
2. CVPR2023|ultralytics/nn/extra_modules/transformer/biformer.py
3. CVPR2023|ultralytics/nn/extra_modules/transformer/CascadedGroupAttention.py
4. CVPR2022|ultralytics/nn/extra_modules/transformer/DAttention.py
5. ICLR2022|ultralytics/nn/extra_modules/transformer/DPBAttention.py
6. CVPR2024|ultralytics/nn/extra_modules/transformer/AdaptiveSparseSA.py
7. ultralytics/nn/extra_modules/transformer/GSA.py
8. ultralytics/nn/extra_modules/transformer/RSA.py
9. ECCV2024|ultralytics/nn/extra_modules/transformer/FSSA.py
10. AAAI2025|ultralytics/nn/extra_modules/transformer/DilatedGCSA.py
11. AAAI2025|ultralytics/nn/extra_modules/transformer/DilatedMWSA.py
12. CVPR2024|ultralytics/nn/extra_modules/transformer/SHSA.py
13. IJCAI2024|ultralytics/nn/extra_modules/transformer/CTA.py
14. IJCAI2024|ultralytics/nn/extra_modules/transformer/SFA.py
15. ultralytics/nn/extra_modules/transformer/MSLA.py
16. ACMMM2025|ultralytics/nn/extra_modules/transformer/CPIA_SA.py
17. NN2025|ultralytics/nn/extra_modules/transformer/TokenSelectAttention.py
18. CVPR2025|ultralytics/nn/extra_modules/transformer/TAB.py
19. TPAMI2025|ultralytics/nn/extra_modules/transformer/LRSA.py
20. ICCV2025|ultralytics/nn/extra_modules/transformer/MALA.py
21. ICML2023|ultralytics/nn/extra_modules/transformer/MUA.py
22. ACMMM2025|ultralytics/nn/extra_modules/transformer/EGSA.py
23. ACMMM2025|ultralytics/nn/extra_modules/transformer/SWSA.py
24. AAAI2026|ultralytics/nn/extra_modules/transformer/DHOGSA.py
25. NeurIPS2025|ultralytics/nn/extra_modules/transformer/CBSA.py
26. TGRS2025|ultralytics/nn/extra_modules/transformer/DPWA.py
27. TIP2025|ultralytics/nn/extra_modules/transformer/DWM_MSA.py
28. CVPR2026|ultralytics/nn/extra_modules/transformer/BinaryAttention.py
29. CVPR2025|ultralytics/nn/extra_modules/transformer/wca.py
- ultralytics/nn/extra_modules/mamba
1. AAAI2025|ultralytics/nn/extra_modules/mamba/SS2D.py
2. CVPR2025|ultralytics/nn/extra_modules/mamba/ASSM.py
3. CVPR2025|ultralytics/nn/extra_modules/mamba/SAVSS.py
4. CVPR2025|ultralytics/nn/extra_modules/mamba/MobileMamba/mobilemamba.py
5. CVPR2025|ultralytics/nn/extra_modules/mamba/MaIR.py
6. TGRS2025|ultralytics/nn/extra_modules/mamba/GLVSS.py
7. ICCV2025|ultralytics/nn/extra_modules/mamba/VSSD.py
8. ICCV2025|ultralytics/nn/extra_modules/mamba/TinyViM.py
9. INFFUS2025|ultralytics/nn/extra_modules/mamba/CSI.py
10. TIP2025|ultralytics/nn/extra_modules/mamba/SFMB.py
11. TGRS2025|ultralytics/nn/extra_modules/mamba/GLSS.py
12. TGRS2025|ultralytics/nn/extra_modules/mamba/GLSS2D.py
13. CVPR2026|ultralytics/nn/extra_modules/mamba/TransMixer.py
- ultralytics/nn/extra_modules/mlp
1. CVPR2024|ultralytics/nn/extra_modules/mlp/ConvolutionalGLU.py
2. IJCAI2024|ultralytics/nn/extra_modules/mlp/DFFN.py
3. ICLR2024|ultralytics/nn/extra_modules/mlp/FMFFN.py
4. CVPR2024|ultralytics/nn/extra_modules/mlp/FRFN.py
5. ECCV2024|ultralytics/nn/extra_modules/mlp/EFFN.py
6. WACV2025|ultralytics/nn/extra_modules/mlp/SEFN.py
7. ICLR2025|ultralytics/nn/extra_modules/mlp/KAN.py
8. CVPR2025|ultralytics/nn/extra_modules/mlp/EDFFN.py
9. ICVJ2024|ultralytics/nn/extra_modules/mlp/DML.py
10. AAAI2026|ultralytics/nn/extra_modules/mlp/DIFF.py
- ultralytics/nn/extra_modules/neck
1. ultralytics/nn/extra_modules/neck/ASF.py
2. ultralytics/nn/extra_modules/neck/BiFPN.py
3. AAAI2022|ultralytics/nn/extra_modules/neck/CTrans.py
4. ultralytics/nn/extra_modules/neck/EfficientRepBiPAN.py
5. ultralytics/nn/extra_modules/neck/GFPN.py
6. ultralytics/nn/extra_modules/neck/HSFPN.py
7. AAAI2025|ultralytics/nn/extra_modules/neck/HS_FPN.py
8. TPAMI2025|ultralytics/nn/extra_modules/neck/HyperComputeModule.py
9. ultralytics/nn/extra_modules/neck/SlimNeck.py
10. ultralytics/nn/extra_modules/neck/GoldYOLO.py
11. ultralytics/nn/extra_modules/neck/EMBSFPN.py
- ultralytics/nn/extra_modules/featurefusion
1. 自研模块|ultralytics/nn/extra_modules/featurefusion/cgfm.py
2. BMVC2024|ultralytics/nn/extra_modules/featurefusion/msga.py
3. CVPR2024|ultralytics/nn/extra_modules/featurefusion/mfm.py
4. TIP2023|ultralytics/nn/extra_modules/featurefusion/CSFCN.py
5. BIBM2024|ultralytics/nn/extra_modules/featurefusion/mpca.py
6. ACMMM2024|ultralytics/nn/extra_modules/featurefusion/wfu.py
7. CVPR2025|ultralytics/nn/extra_modules/featurefusion/GDSAFusion.py
8. ultralytics/nn/extra_modules/featurefusion/PST.py
9. TGRS2025|ultralytics/nn/extra_modules/featurefusion/MSAM.py
10. INFFUS2025|ultralytics/nn/extra_modules/featurefusion/DPCF.py
11. CVRP2025|ultralytics/nn/extra_modules/featurefusion/LCA.py
12. TGRS2025|ultralytics/nn/extra_modules/featurefusion/HFFE.py
13. TGRS2025|ultralytics/nn/extra_modules/featurefusion/MFPM.py
14. TGRS2025|ultralytics/nn/extra_modules/featurefusion/ERM.py
15. TIP2025|ultralytics/nn/extra_modules/featurefusion/CAFM.py
16. TIP2024|ultralytics/nn/extra_modules/featurefusion/CGAFusion.py
17. IF2023|ultralytics/nn/extra_modules/featurefusion/PSFM.py
18. IF2023|ultralytics/nn/extra_modules/featurefusion/SDFM.py
19. 自研模块|ultralytics/nn/extra_modules/featurefusion/DAF.py
20. 自研模块|ultralytics/nn/extra_modules/featurefusion/CIDAF.py
21. 自研模块|ultralytics/nn/extra_modules/featurefusion/WDAF.py
- ultralytics/nn/extra_modules/norm
1. ICML2024|engine/extre_module/custom_nn/transformer/repbn.py
2. CVPR2025|engine/extre_module/custom_nn/transformer/dyt.py
3. engine/extre_module/custom_nn/norm/derf.py
- ultralytics/nn/extra_modules/featurepreprocess
1. TGRS2025|ultralytics/nn/extra_modules/featurepreprocess/FAENet.py
- ultralytics/nn/extra_modules/head(ultralytics/cfg/models/improve/head)
1. ultralytics/nn/extra_modules/head/LSPCD.py
## Loss 列表
#### 默认配置(兼容)
- cls_loss=bce
- iou_loss=ciou
- iou_aux=none
- cls_loss(分类损失)
1. bce
2. slide
3. ema_slide
4. focal
5. varifocal
6. qualityfocal
- iou_loss(IoU主损失)
1. 基础形式:
iou、giou、diou、ciou、eiou、siou、shapeiou、piou、piou2
2. Inner形式:
inner_<base>(例如:inner_diou、inner_ciou、inner_siou)
3. Focaler形式:
focaler_<base>(例如:focaler_diou、focaler_ciou、focaler_siou)
4. MPDIoU家族:
mpdiou、inner_mpdiou、focaler_mpdiou
5. WiseIoU家族:
wiseiou(等价wiseiou_wiou)
wiseiou_<variant>
wiseiou_inner_<variant>
wiseiou_focaler_<variant>
6. wise <variant> 可选值:
iou、wiou、giou、diou、ciou、eiou、siou、shapeiou、piou、piou2、mpdiou
- iou_aux(IoU辅助损失)
1. none
2. gcd
3. nwd
## 更新公告
- 20260217
1. 初版项目发布.
2. 新增使用教程、模块改进使用教程视频.
- 20260228
1. 新增常见的cls和iou的损失,并直接支持在train.py里面指定,并且在训练的时候会打印目前的loss.
2. 对模型改进的yaml扩展到yolov8、yolov10、yolo11、yolo12.
3. 新增在训练过程中mAP75输出.
4. 优化detect.py中的特征图保存机制,使其可以单独保存每一个通道的特征图和总通道求和的特征图.
5. 新增毕业必备-基于web的可视化界面,支持选择模型、检测图片、检测视频,显示目标数量等等功能
6. 新增web界面的教程视频.
7. 新增注册module的教程视频.
- 20260308
1. 在val.py脚本中增加auto_coco_eval指标,支持一步到位计算COCO指标,不需要再人为转换标签和对齐标签的问题!
2. 新增AAAI2026-SPJFB模块.
3. 新增TGRS2025-GLSS2D模块.
4. 新增TIP2025-CAFM模块.
5. 新增TIP2025-DWM_MSA模块.
6. 新增DynamicERF模块.
7. 新增CSP、MetaFormer、Module在yaml中的使用教程-20260307补充版的视频.
8. 修复用户反馈的bug.
- 20260315
1. 新增CVPR2026-DEGConv模块。
2. 新增CVPR2026-BinaryAttention模块。
3. 新增CVPR2026-TransMixer模块。
4. 新增CVPR2025-wca模块。
5. 新增自研模块-DAF模块。
6. 新增自研模块-CIDAF模块。
7. 新增自研模块-WDAF模块。
8. 新增Neck部分内容(ASF、BIFPN、CTrans、ERepBIFPN、GFPN、HSFPN、HS-FPN、超图FPN、SlimNeck、GoldYOLO、EMBSFPN)。
9. 补全attention部分的配置文件。
10. 新增conv、attention的内容如何与CSP模块随意组合的使用教程。
11. 修复用户反馈的bug。
================================================
FILE: bilibili-guide.md
================================================
# 魔鬼面具-哔哩哔哩视频指南
### 必看干货系列(建议搞深度学习的小伙伴都看看,特别是图像相关)
1. [深度学习常见实验问题与实验技巧(适用于所有模型,小白初学者必看!)](https://www.bilibili.com/video/BV17j41147j8/)
2. [还在迷茫深度学习中的改进实验应该从哪里开始改起的同学,一定要进来看看了!用自身经验给你推荐实验顺序!](https://www.bilibili.com/video/BV1Nu4y1G7B9/)
3. [探究深度学习中预训练权重对改进和精度的影响!](https://www.bilibili.com/video/BV1FH4y1o7GL/)
4. [什么?你说你不会画模型结构图?行吧,那你进来看看吧,手把手教你画YAML结构图!](https://www.bilibili.com/video/BV1X94y1K76Z/)
5. [探究深度学习中训练中的可重现性](https://www.bilibili.com/video/BV1Nu4y1s7sc/)
6. [什么?你说你更换主干后看不懂配置文件也不懂画结构图?那你快点进来看看了!](https://www.bilibili.com/video/BV1WA4m1V7nQ/)
7. [从三个角度分析,什么条件才算是一个合格的改进专栏!](https://www.bilibili.com/video/BV1E6421g7eb/)
8. [都2024了,你写论文不会还只用p,r,map这些指标分析目标检测模型吧?](https://www.bilibili.com/video/BV1wF4m177JQ/)
9. [从简到难手把手教你画Pytorch模块内的结构图!](https://www.bilibili.com/video/BV1dC411p7H7/)
10. [深度学习论文实验中的其中一大注意点-预训练权重究竟加还是不加?](https://www.bilibili.com/video/BV1Q1421Q7Zw/)
11. [深度学习改进实验必看!基于YOLOV8的WIDER-FACE改进(轻量化+提点)实验思路讲解](https://www.bilibili.com/video/BV1QJ4m1H7DJ/)
12. [YOLOV8-硬塞注意力机制?这样做没创新!想知道注意力怎么用才有创新那赶快来看看!](https://www.bilibili.com/video/BV1bm421K7tf/)
13. [YOLOV8改进-还硬塞注意力机制?这期用注意力机制手把手给大家自研一个ContextGuideFPN!创新真的不难,需要找对方法!](https://www.bilibili.com/video/BV1Vx4y1n7hZ/)
14. [长达46分钟的肺腑之言!给以后想从事图像算法工程师、小白入门深度学习路线的总结!](https://www.bilibili.com/video/BV16y411h7T9/)
15. [提升多少才能发paper?轻量化需要看什么指标?需要轻量化到什么程度才能发paper?这期给大家一一解答!](https://www.bilibili.com/video/BV1QZ421M7gu/)
16. [深度学习实验部分常见疑问解答!(小白刚入门必看!少走弯路!少自我内耗!)](https://www.bilibili.com/video/BV1Bz421B7pC/)
```
1. 如何衡量自己的所做的工作量够不够?
2. 为什么别人的论文说这个模块对xxx有作用,但是我自己用的时候还掉点了?
3. 提升是和什么模型相比呢 比如和yolov8这种基础模型比还是和别人提出的目前最好的模型比
4. 对比不同的模型的时候,输入尺寸,学习率,学习次数这些是否需要一致?
```
17. [深度学习实验部分常见疑问解答二!(小白刚入门必看!少走弯路!少自我内耗!)](https://www.bilibili.com/video/BV1ZM4m1m785/)
```
1. 为什么我用yolov8自带的coco8、coco128训练出来的效果很差?
2. 我的数据集很大,机器跑得慢,我是否可以用数据集的百分之10的数据去测试这个改进点是否有效?有效再跑整个数据集?
```
18. [深度学习实验部分常见疑问解答三!(怎么判断模型是否收敛?模型过拟合怎么办?)](https://www.bilibili.com/video/BV11S421d76P/)
19. [YOLO系列模型训练结果详细解答!(训练过程的一些疑问,该放哪个文件运行出来的结果、参数量计算量在哪里看..等等问题)](https://www.bilibili.com/video/BV11b421J7Vx/)
20. [细谈目标检测中的小目标检测头和大目标检测检测头,并教懂你怎么加微小目标、极大目标检测头!](https://www.bilibili.com/video/BV1jkDWYFEwx/)
21. [深度学习炼丹必备必看必须知道的小技巧!](https://www.bilibili.com/video/BV1q3SZYsExc/)
22. [深度学习实验准备-数据集怎么选?有哪些需要注意的点?](https://www.bilibili.com/video/BV11zySYvEhs/)
23. [深度学习论文实验中新手非常容易陷入的一个误区:抱着解决xxx问题的心态去做实验](https://www.bilibili.com/video/BV1kkkvYJEHG/)
24. [小目标检测必看系列 | 除了AP-Small指标,可还有AP-VeryTiny、AP-Tiny的指标喔~手把手带你加!](https://www.bilibili.com/video/BV1CYcUeBEzY/)
25. [YOLO中的实例分割原来是这样巧妙地实现的!你在做YOLO-Seg但是又不知道的话,那你要进来看看咯~](https://www.bilibili.com/video/BV1SkP1e1EHC/)
26. [长达30分钟的吐血讲解!为什么别人的纯YOLO小目标检测能上AAAI2025,你的连个最差的都费劲!看看差距在哪里,怎么改善!](https://www.bilibili.com/video/BV14DJazTEtV)
27. [深度学习论文中的基础实验、改进实验、 消融实验、对比实验、泛化实验|这些究竟是什么?](https://www.bilibili.com/video/BV1NYKUz2E6b/)
28. [深度学习论文中的推理结果图、热力图、特征图究竟应该怎么放?需要注意什么?有什么作用?](https://www.bilibili.com/video/BV1s5gQzcEPh/)
29. [YOLO|RTDETR|我会跑Ultralytics了!但是输出的这些都怎么看呀?论文中的结果写什么呀?需要注意什么呀?](https://www.bilibili.com/video/BV1VfbVzHEGM/)
### 服务器租用系列
1. [|DAModel|竟然有一个"不需要装环境就能跑YOLO代码"的服务器平台?让我们一起来看看!](https://www.bilibili.com/video/BV1mg2SYGEGF)
2. [|DAModel|给大家准备好COCO、VOC、VisDrone、CrowdHuman、BDD100K数据集啦~YOLO格式和data.yaml都已配置好~](https://www.bilibili.com/video/BV1UV5qzuEGf)
3. [智算云扉服务器平台|0.99每小时的3090?RTX4090-48GB的显卡?已经配置好的YOLO|RTDETR环境?充值还有额外算力点?标题有限制优势说不完。](https://www.bilibili.com/video/BV11DXTYiENS)
### 必看论文分享系列
1. [有营养的必看论文分享系列一-RTMDet<考虑到精度、速度、部署的2D目标检测网络>](https://www.bilibili.com/video/BV1ab421J77G/)
2. [有营养的必看论文分享系列二-MobileNets<轻量化的开山之作>](https://www.bilibili.com/video/BV1hM4m117JW/)
3. [计算机视觉|YOLO|DETR|2025创新必看的论文之一|MetaFormer(TPAMI2024),选对Baseline是成功的第一步](https://www.bilibili.com/video/BV1W5ATetEg6/)
### 高区论文带读系列
1. [高区论文带读系列一-40分钟长视频带你分析一篇SCI1区的文章,SCI1区也不是触不可及!](https://www.bilibili.com/video/BV1JESuYxEjn/)
2. [高区论文带读系列二-学会捕捉数据集场景下的要害问题是写好文章的第一步!](https://www.bilibili.com/video/BV1XNqjYNEyg/)
### YOLO系列配置文件系列
1. [不会把多个改进整合到一个yaml配置文件里面?那来看看这个吧!从简到难手把手带你整合三个yaml](https://www.bilibili.com/video/BV15H4y1Y7a2/)
2. [细谈目标检测中的小目标检测头和大目标检测检测头,并教懂你怎么加微小目标、极大目标检测头!](https://www.bilibili.com/video/BV1jkDWYFEwx/)
3. [不会看YOLO的模型yaml配置文件?那你还怎么整合多个配置文件!](https://www.bilibili.com/video/BV1oiBRYnEEw/)
4. [不会把多个创新点整合到一个yaml配置文件里面?那来看看这个吧!手把手来你整合创新点!](https://www.bilibili.com/video/BV1DUBRYGE3b/)
### YOLOV5,V7-PYQT5项目讲解
1. [哔哩哔哩合集地址](https://space.bilibili.com/286900343/channel/collectiondetail?sid=917275)
2. [项目github地址](https://github.com/z1069614715/yolov7-pyqt)
### YOLOV5、V7、V8、V9、V10、V11、V12 热力图源码
1. [哔哩哔哩合集地址](https://space.bilibili.com/286900343/channel/collectiondetail?sid=1080305)
2. [项目github地址](https://github.com/z1069614715/objectdetection_script/blob/master/yolo-gradcam)
### YOLO系列模型使用教程系列
1. [YOLOV7保姆级教程](https://www.bilibili.com/video/BV1gD4y1s7zw/?spm_id_from=333.999.0.0)
2. [YOLOV5-Seg实例分割教程](https://www.bilibili.com/video/BV1nV4y1P7HQ/?spm_id_from=333.999.0.0)
3. [YOLOV5-快速上手教程](https://www.bilibili.com/video/BV1tM411a7it/?spm_id_from=333.999.0.0)
4. [YOLOV8-OBB详细教学视频(包含如何把DOTA数据集分割成小图进行训练)](https://www.bilibili.com/video/BV1xK4y117fg/)
5. [EfficientTeacher半监督-详细教学和调参注意事项](https://www.bilibili.com/video/BV1494y1v7hF/)
6. [YOLOV9保姆级别教程来啦~包含环境配置、数据集转换、训练、测试、推理环节~一看就懂!](https://www.bilibili.com/video/BV1d1421z7XW/)
7. [保姆级别YOLOV11-环境配置、 数据集介绍、训练、验证、推理 详细教学视频,看了它,跑YOLOV11 没问题~](https://www.bilibili.com/video/BV1VA11YBELB/)
### YOLOV8V11源码常见疑问解答小课堂
1. [关于配置文件中Optimizer参数为auto的时候,究竟Optimizer会怎么选用呢?](https://www.bilibili.com/video/BV1K34y1w7cZ/)
2. [best.pt究竟是根据什么指标来保存的?](https://www.bilibili.com/video/BV1jN411M7MA/)
3. [数据增强在yolov8中的应用](https://www.bilibili.com/video/BV1aQ4y1g7ah/)
4. [如何添加FPS计算代码和FPS的相关的一些疑问](https://www.bilibili.com/video/BV1Sw411g7DD/)
5. [预测框粗细颜色修改与精度小数位修改](https://www.bilibili.com/video/BV12K421a7rH/)
6. [导出改进/剪枝的onnx模型和讲解onnx-opset和onnxsim的作用](https://www.bilibili.com/video/BV1CK421e7Y3/)
7. [YOLOV8模型详细讲解(包含该如何改进YOLOV8)(刚入门小白,需要改进YOLOV8的同学必看!)](https://www.bilibili.com/video/BV1Ms421u7VH/)
8. [学习率变化问题](https://www.bilibili.com/video/BV1frnferEL1/)
### 目标检测干活系列
1. [深入了解目标检测中的检测头](https://www.bilibili.com/video/BV1AQ4y1j7Cr/)
2. [目标检测中的标签分配策略做了什么?分配过程中的正负样本又是什么?](https://www.bilibili.com/video/BV1Ek4aeUE2J/)
### 环境配置系列教程
1. [保姆式AUTODL-YOLO环境教程(上):从0教你如何配置VSCODE、安装新环境和CUDA和CUDNN、跑通YOLOV8、编译DCNV3](https://www.bilibili.com/video/BV1tT4y1b75q/)
2. [保姆式AUTODL-YOLO环境教程(下):从0教你如何配置VSCODE、安装新环境和CUDA和CUDNN、跑通YOLOV8、编译DCNV3](https://www.bilibili.com/video/BV1nV411Q7mA/)
### 目标检测Tricks
1. [可视化并统计目标检测中的TP,FP,FN](https://www.bilibili.com/video/BV1yM4y1d7Gp/)
2. [深度学习小实验-卷积家族(fps,flops,param)对比实验](https://www.bilibili.com/video/BV1UL411R7Qr/)
3. [yolov5中的FeatureMap可视化(热力图格式)](https://www.bilibili.com/video/BV1LV4y1R7w6/)
4. [用于yolov5和v7中的yolo格式转换coco格式的脚本.](https://www.bilibili.com/video/BV14T411s7Ts/)
5. [Segment Anything演示代码](https://www.bilibili.com/video/BV1hv4y1H7eg/)
6. [固定随机种子在同一个主机上极可能地复现结果](https://www.bilibili.com/video/BV1bh4y1n7Yc/)
7. [计算yolov5推理时间和FPS的脚本](https://www.bilibili.com/video/BV1Uu4y1C714/)
8. [计算yolov7推理时间和FPS的脚本](https://www.bilibili.com/video/BV17p4y177Pe/)
9. [深度学习小实验-YOLO-Block家族(fps,flops,param)对比实验.](https://www.bilibili.com/video/BV17H4y1V7s9/)
10. [输出YOLOV8、RTDETR各个层的计算量和参数量.](https://www.bilibili.com/video/BV1tb421b7aB/)
11. [YOLOV8-不会把PR曲线的数据保存并绘制到一张图?不用怕,手把手教程来啦~](https://www.bilibili.com/video/BV1uC41177oE/)
12. [yolov5、v7、v8、v9、v10曲线对比图、推理时间vs精度对比图绘制手把手教程!](https://www.bilibili.com/video/BV1yf421X7t5/)
13. [YOLOV8-输出每一层的图特征图尺寸和通道数.](https://www.bilibili.com/video/BV1Mz421B7xz/)
14. [YOLOV8V10V11V12更详细的输出精度结果](https://www.bilibili.com/video/BV1dBQDY6Ec5/)
15. [关于数据集的可视化脚本](https://www.bilibili.com/video/BV1k2TizGEnH/)
### MMDet系列教程
1. [一库打尽目标检测对比实验!mmdetection环境、训练、测试手把手教程!](https://www.bilibili.com/video/BV1xA4m1c7H8/)
2. [一库打尽目标检测对比实验!mmdetection参数量、计算量、FPS、绘制logs手把手教程](https://www.bilibili.com/video/BV17C41137dW/)
3. [一库打尽目标检测对比实验!mmdetection指标转换YOLO指标!](https://www.bilibili.com/video/BV1AWtCesEc6/)
### 离线数据增强教程
1. [目标检测数据集离线数据增强教程,包含对目标框、多种变换、天气变化等等增强!](https://www.bilibili.com/video/BV1bT421k7iq/)
2. [语义分割数据集离线数据增强教程,包含对mask、多种变换、天气变化等等增强!](https://www.bilibili.com/video/BV1xi421a7Gb/)
3. [CVPR2025-SaMam|手把手带你用以Mamba为核心的任意风格迁移网络去做数据集扩充!(一个小创新点有了!)](https://www.bilibili.com/video/BV1gWE4z4Eqq/)
### YOLO系列(YOLOV5,YOLOV7,YOLOV8)模型改进大合集
#### YOLOV5(主干系列修改V7同样也适用)
1. [添加EIOU,SIOU,ALPHA-IOU, FocalEIOU到yolov5的box_iou中](https://www.bilibili.com/video/BV1KM411b7Sz/)
2. [Wise-IoU](https://www.bilibili.com/video/BV1tG4y1N7Gk/)
3. [使用DAMO-YOLO中的GFPN替换YOLOV5中的Head](https://www.bilibili.com/video/BV1iR4y1a7bx/)
4. [使用DAMO-YOLO中的GFPN替换YOLOV5中的Head](https://www.bilibili.com/video/BV1iR4y1a7bx/)
5. [使用yolov8中的C2F模块替换yolov5中的C3模块.](https://www.bilibili.com/video/BV1rx4y1g7xt/)
6. [添加Optimal Transport Assignment到yolov5的Loss中](https://www.bilibili.com/video/BV1xD4y1J76n/)
7. [添加Deformable convolution V2到yolov5中](https://www.bilibili.com/video/BV1rT411Q76q/)
8. [添加辅助训练分支到yolov5中](https://www.bilibili.com/video/BV1Fo4y1v7bi/)
9. [添加context augmentation module到yolov5中](https://www.bilibili.com/video/BV17b411d7ef/)
10. [添加SAC到yolov5中](https://www.bilibili.com/video/BV1xD4y1u7NU/)
11. [添加CoordConv到yolov5中](https://www.bilibili.com/video/BV1ng4y1E7rS/)
12. [添加soft-nms(IoU,GIoU,DIoU,CIoU,EIoU,SIoU)到yolov5中](https://www.bilibili.com/video/BV1cM41147Ry/)
13. [添加DSConv到yolov5中](https://www.bilibili.com/video/BV1iT411a7Mi/)
14. [添加DCNV3到yolov5中.](https://www.bilibili.com/video/BV1LY411z7iE/)
15. [添加Normalized Gaussian Wasserstein Distance到yolov5中.](https://www.bilibili.com/video/BV1zY4y197UP/)
16. [添加Efficient-DecoupledHead到yolov5中](https://www.bilibili.com/video/BV1mk4y1h7us/)
17. [添加FasterNet中的Faster-Block到yolov5中](https://www.bilibili.com/video/BV1Bs4y1H7Ph/)
18. [添加Timm支持的主干到yolov5中.](https://www.bilibili.com/video/BV1Mx4y1A7jy/)
19. [添加Task-Specific Context Decoupling到yolov5中](https://www.bilibili.com/video/BV1mk4y1h7us/)
20. [添加FasterNet主干到yolov5中](https://www.bilibili.com/video/BV1ra4y1K77u/)
21. [添加Omni-Dimensional Dynamic Convolution主干(od_mobilenetv2,od_resnet)到yolov5中](https://www.bilibili.com/video/BV1Jk4y1v7EW/)
22. [融合Omni-Dimensional Dynamic Convolution主干(od_mobilenetv2,od_resnet)中的Conv和BN](https://www.bilibili.com/video/BV1Rs4y1N7fp/)
23. [添加轻量级上采样算子CARAFE到yolov5中](https://www.bilibili.com/video/BV1kj411c72a/)
24. [添加CFPNet中的EVC-Block到yolov5中](https://www.bilibili.com/video/BV1Pg4y1u7cM/)
25. [添加基于注意力机制的目标检测头(DYHEAD)到yolov5中](https://www.bilibili.com/video/BV1qs4y117Mx/)
26. [添加(2023年New)InceptionNeXt主干到yolov5中](https://www.bilibili.com/video/BV12v4y1H7E1/)
27. [添加aLRPLoss到yolov5中](https://www.bilibili.com/video/BV1YV4y1Z7rV/)
28. [结合Res2Net提出具有多尺度提取能力的C3模块](https://www.bilibili.com/video/BV13X4y167VB/)
29. [添加(2022年)FocalNet(transformer)主干到yolov5中](https://www.bilibili.com/video/BV1ch411L7Dk/)
30. [添加(2023年)EMO(transformer)主干到yolov5中](https://www.bilibili.com/video/BV1Dh4y1J7SV/)
31. [添加(2022年)EfficientFormerV2(transformer)主干到yolov5中](https://www.bilibili.com/video/BV1da4y1g7KT/)
32. [添加(2022年CVPR)PoolFormer(transformer)主干到yolov5中](https://www.bilibili.com/video/BV1eh411c7bz/)
33. [添加(2023年)EfficientViT(transformer)主干到yolov5中](https://www.bilibili.com/video/BV1xk4y1L7Gu/)
34. [添加ContextAggregation到yolov5中](https://www.bilibili.com/video/BV1Yk4y1s7Kx/)
35. [添加(2023年)VanillaNet主干到yolov5中](https://www.bilibili.com/video/BV1os4y1v7Du/)
36. [添加(2022年)NextViT主干到yolov5中](https://www.bilibili.com/video/BV1im4y1i7Ht/)
37. [添加(2023年)RIFormer主干到yolov5中](https://www.bilibili.com/video/BV1bW4y1X7Lo/)
38. [Scale-Aware RFE与C3结合而成的C3RFEM添加到yolov5中](https://www.bilibili.com/video/BV1Gj411D7Pf/)
39. [把重参数结构DiverseBranchBlock与C3融合成C3-DBB添加到yolov5中](https://www.bilibili.com/video/BV1sM4y177Cn/)
40. [添加(2023CVPR)EfficientViT(transformer)主干到yolov5中](https://www.bilibili.com/video/BV1xk4y1L7Gu/)
41. [添加(2023旋转目标检测SOTA)LSKNet主干到yolov5中](https://www.bilibili.com/video/BV1xk4y1L7Gu/)
42. [添加(2023最新IoU度量算法)MPDiou到yolov5中.](https://www.bilibili.com/video/BV19P41147gJ/)
43. [添加Yolo-Face-V2中SlideLoss的到yolov5中](https://www.bilibili.com/video/BV1W14y1i79U/)
44. [添加RepViT(transformer)主干到yolov5中](https://www.bilibili.com/video/BV1PH4y1S7mf/)
45. [利用华为2023最新GOLD-YOLO中的Gatherand-Distribute进行改进YOLOV5中的特征融合模](https://www.bilibili.com/video/BV1PH4y1S7mf/)
46. [利用动态蛇形卷积改进YOLOV5](https://www.bilibili.com/video/BV1Qu411K7Hw/)
47. [利用带有位置信息编码的AIFI自注意力机制改进YOLOV5](https://www.bilibili.com/video/BV1nu4y1h7eS/)
48. [添加UniRepLKNet主干到yolov5中](https://www.bilibili.com/video/BV1PH4y1S7mf/)
49. [添加Attentional Scale Sequence Fusion到yolov5中](https://www.bilibili.com/video/BV1PH4y1S7mf/)
50. [添加cross-scale feature-fusion到yolov5中](https://www.bilibili.com/video/BV1Tb4y1P7yd/)
51. [添加对小目标有效的BiFormer注意力机制到yolov5中](https://www.bilibili.com/video/BV15g4y1g7bM/)
52. [引入最新SOTA(YOLOV9)中的RepNCSPELAN模块](https://www.bilibili.com/video/BV17y421z73k/)
#### YOLOV7
1. [添加EIOU,SIOU,ALPHA-IOU, FocalEIOU到yolov5的box_iou中](https://www.bilibili.com/video/BV1zx4y177EF/)
2. [Wise-IoU](https://www.bilibili.com/video/BV1yv4y147kf/)
3. [添加Deformable convolution V2到yolov7中](https://www.bilibili.com/video/BV17R4y1q7vr/)
4. [添加SAC到yolov7中](https://www.bilibili.com/video/BV1xD4y1u7NU/)
5. [添加CoordConv到yolov7中](https://www.bilibili.com/video/BV1K54y1g7ye/)
6. [添加soft-nms(IoU,GIoU,DIoU,CIoU,EIoU,SIoU)到yolov7中](https://www.bilibili.com/video/BV1ZY41167iC/)
7. [添加DSConv到yolov7中](https://www.bilibili.com/video/BV1724y1b7PD/)
8. [添加DCNV3到yolov7中.](https://www.bilibili.com/video/BV1mk4y1h7us/)
9. [添加Normalized Gaussian Wasserstein Distance到yolov7中](https://www.bilibili.com/video/BV1kM411H7g1/)
10. [添加具有隐式知识学习的Efficient-DecoupledHead到yolov7中](https://www.bilibili.com/video/BV1tg4y1x7ha/)
11. [添加FasterNet中的PConv到yolov7中](https://www.bilibili.com/video/BV1Z84y137oi/)
12. [添加轻量级上采样算子CARAFE到yolov7中.](https://www.bilibili.com/video/BV1yc411p7wL/)
13. [添加基于注意力机制的目标检测头(DYHEAD)到yolov7中](https://www.bilibili.com/video/BV1Ph4y1s7i9/)
14. [添加Omni-Dimensional Dynamic Convolution到yolov7中](https://www.bilibili.com/video/BV1vh411j71Z/)
15. [添加CFPNet中的EVC-Block到yolov7中](https://www.bilibili.com/video/BV12u4y1f7np/)
16. [P2,P6检测层在YOLOV7中的添加](https://www.bilibili.com/video/BV1LX4y1a72m/)
17. [使用VOVGSCSP轻量化yolov7的Neck](https://www.bilibili.com/video/BV14m4y147PC/)
18. [添加SwinTransformer-Tiny主干到yolov5中](https://www.bilibili.com/video/BV1WX4y1a7ea/)
19. [Scale-Aware RFE添加到yolov7中](https://www.bilibili.com/video/BV1hW4y1D7gQ/)
20. [把重参数结构DiverseBranchBlock添加到yolov7中](https://www.bilibili.com/video/BV14u411b7kL/)
21. [添加(2023最新IoU度量算法)MPDiou到yolov7中](https://www.bilibili.com/video/BV1Qh4y1r7D3/)
22. [利用华为2023最新GOLD-YOLO中的Gatherand-Distribute进行改进YOLOV7中的特征融合模块.](https://www.bilibili.com/video/BV14V411c7H1/)
23. [利用动态蛇形卷积改进YOLOV7](https://www.bilibili.com/video/BV1Wj411x7fq/)
24. [利用带有位置信息编码的AIFI自注意力机制改进YOLOV7](https://www.bilibili.com/video/BV1rj411a7s4/)
25. [添加Attentional Scale Sequence Fusion到yolov7中](https://www.bilibili.com/video/BV1PH4y1S7mf/)
26. [引入最新SOTA(YOLOV9)中的RepNCSPELAN模块](https://www.bilibili.com/video/BV1UA4m137hz/)
#### YOLOV8
1. [添加EIOU,SIOU,ALPHA-IOU, FocalEIOU到yolov5,yolov8的box_iou中](https://www.bilibili.com/video/BV1PY4y1o7Hm/)
2. [Wise-IoU](https://www.bilibili.com/video/BV1De4y1N7Mb/)
3. [添加Deformable convolution V2到yolov8中](https://www.bilibili.com/video/BV1Fo4y1i7Mm/)
4. [最新~YOLOV8手把手教学配置文件添加注意力机制!一看就会!](https://www.bilibili.com/video/BV1RH4y1D7CY/)
5. [YOLOV8改进-手把手带你学会注意力机制进阶用法](https://www.bilibili.com/video/BV1ZQ4y1J7oC/)
6. [YOLOV8可视化-可视化并统计每张图的True Positive、False Positive、False Negative](https://www.bilibili.com/video/BV1RA4m1L79K/)
7. [YOLOV8-基于VisDrone的TaskAlignedAssigner任务对齐分配策略的调参实验](https://www.bilibili.com/video/BV1XJ4m1x7eJ/)
8. [YOLOV8-不会把多个改进整合到一个yaml配置文件里面?那来看看这个吧!从简到难手把手带你整合三个yaml](https://www.bilibili.com/video/BV15H4y1Y7a2/)
9. [YOLOV8下游任务系列-一步一步DEBUG保姆式带你完成目标计数](https://www.bilibili.com/video/BV17H4y1J7DD/)
10. [YOLOV8改进-带你分析V8的检测头并重设计10种结构轻量化检测头](https://www.bilibili.com/video/BV1cu411K7FE/)
11. [从CVPR2022-RepLKNet分析有效感受野,并提供YOLOV8可视化感受野的脚本和讲解~](https://www.bilibili.com/video/BV1Gx4y1v7ZZ/)
12. [YOLOV8-不会把PR曲线的数据保存并绘制到一张图?不用怕,手把手教程来啦~](https://www.bilibili.com/video/BV1uC41177oE/)
13. [YOLOV8应用NMS-Free效果怎么样?在Visdrone2019数据集上进行实验,效果不错!后处理时间为0.0ms!](https://www.bilibili.com/video/BV1bt421N7ob/)
14. [YOLOV8-NMSFree|更多公开数据集测试!VisDrone、VOC、PCB](https://www.bilibili.com/video/BV1nZ421x7jr/)
15. [YOLOV8模型详细讲解(包含该如何改进YOLOV8)(刚入门小白,需要改进YOLOV8的同学必看!)](https://www.bilibili.com/video/BV1Ms421u7VH/)
#### YOLOV9
1. [YOLOV9-VisDrone实验对比结果来啦!YOLOV9-C模型VisDrone测试集精度为39.7!有兴趣进来看看具体啦!](https://www.bilibili.com/video/BV1Yy42187A3/)
2. [从源码分析YOLOV9比YOLOV7多了什么内容!](https://www.bilibili.com/video/BV1v1421f7rN/)
3. [YOLOV9n VS YOLOV8n,在VisDrone数据集上精度有2.4个点的提升!](https://www.bilibili.com/video/BV16m411f78L/)
4. [YOLOV9改进-更换轻量化王者MobilenetV4-Backbone](https://www.bilibili.com/video/BV1Ax4y1B7Ln/)
5. [YOLOV9改进-CVPR2024-StarNet、DRepCSPELAN](https://www.bilibili.com/video/BV1BU411o7rz/)
6. [YOLOV9改进-CVPR2023-FasterNet以及其FasterBlock、PConv的改进](https://www.bilibili.com/video/BV18y411a74y/)
7. [YOLOV9改进-DySnakeConv动态蛇形卷积、针对长条形不规则物体!](https://www.bilibili.com/video/BV1gi421S77X/)
#### YOLOV11
1. [Ultralytics8.3.0沉浸式讲解-YOLOV11针对代码的详细剖析](https://www.bilibili.com/video/BV19XxxeXEma/)
2. [保姆级别YOLOV11-环境配置、 数据集介绍、训练、验证、推理 详细教学视频,看了它,跑YOLOV11 没问题~](https://www.bilibili.com/video/BV1VA11YBELB/)
3. [YOLOV11改进详细分析(改进前必看),每个部分(Backbone、Neck、Head....)有哪些地方可以改进?改进的时候要避免小白三件套!](https://www.bilibili.com/video/BV1GKCdYbEuz/)
#### YOLOV13
1. [哎哟你干嘛!YOLO又又又又出新版本了,YOLOV13来了!我们来看看YOLOV13改进了什么,对正在做YOLO改进的同学有什么影响?](https://www.bilibili.com/video/BV1jqKbzGEua/)
#### D-Fine-ICLR2025
1. [暴打CVPR2024-RTDETR的D-Fine究竟性能如何?我们一起来训练看看~](https://www.bilibili.com/video/BV1aE6aYHEer/)
#### DEIM-CVPR2025
1. [CVPR2025-DEIM|新一代目标检测SOTA|2025发高区论文必备的baseline|训练、测试、10几集的基础改进课程、画图教程系列](https://space.bilibili.com/286900343/lists/4909499)
================================================
FILE: cv-attention/A2Attention.py
================================================
import numpy as np
import torch
from torch import nn
from torch.nn import init
from torch.nn import functional as F
class DoubleAttention(nn.Module):
def __init__(self, in_channels,c_m=128,c_n=128,reconstruct = True):
super().__init__()
self.in_channels=in_channels
self.reconstruct = reconstruct
self.c_m=c_m
self.c_n=c_n
self.convA=nn.Conv2d(in_channels,c_m,1)
self.convB=nn.Conv2d(in_channels,c_n,1)
self.convV=nn.Conv2d(in_channels,c_n,1)
if self.reconstruct:
self.conv_reconstruct = nn.Conv2d(c_m, in_channels, kernel_size = 1)
self.init_weights()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
b, c, h,w=x.shape
assert c==self.in_channels
A=self.convA(x) #b,c_m,h,w
B=self.convB(x) #b,c_n,h,w
V=self.convV(x) #b,c_n,h,w
tmpA=A.view(b,self.c_m,-1)
attention_maps=F.softmax(B.view(b,self.c_n,-1))
attention_vectors=F.softmax(V.view(b,self.c_n,-1))
# step 1: feature gating
global_descriptors=torch.bmm(tmpA,attention_maps.permute(0,2,1)) #b.c_m,c_n
# step 2: feature distribution
tmpZ = global_descriptors.matmul(attention_vectors) #b,c_m,h*w
tmpZ=tmpZ.view(b,self.c_m,h,w) #b,c_m,h,w
if self.reconstruct:
tmpZ=self.conv_reconstruct(tmpZ)
return tmpZ
if __name__ == '__main__':
input=torch.randn(50,512,7,7)
a2 = DoubleAttention(512)
output=a2(input)
print(output.shape)
================================================
FILE: cv-attention/BAM.py
================================================
import numpy as np
import torch
from torch import nn
from torch.nn import init
def autopad(k, p=None, d=1): # kernel, padding, dilation
"""Pad to 'same' shape outputs."""
if d > 1:
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Flatten(nn.Module):
def forward(self, x):
return x.view(x.shape[0], -1)
class ChannelAttention(nn.Module):
def __init__(self, channel, reduction=16, num_layers=3):
super().__init__()
self.avgpool = nn.AdaptiveAvgPool2d(1)
gate_channels = [channel]
gate_channels += [channel // reduction] * num_layers
gate_channels += [channel]
self.ca = nn.Sequential()
self.ca.add_module('flatten', Flatten())
for i in range(len(gate_channels) - 2):
self.ca.add_module('fc%d' % i, nn.Linear(gate_channels[i], gate_channels[i + 1]))
self.ca.add_module('bn%d' % i, nn.BatchNorm1d(gate_channels[i + 1]))
self.ca.add_module('relu%d' % i, nn.ReLU())
self.ca.add_module('last_fc', nn.Linear(gate_channels[-2], gate_channels[-1]))
def forward(self, x):
res = self.avgpool(x)
res = self.ca(res)
res = res.unsqueeze(-1).unsqueeze(-1).expand_as(x)
return res
class SpatialAttention(nn.Module):
def __init__(self, channel, reduction=16, num_layers=3, dia_val=2):
super().__init__()
self.sa = nn.Sequential()
self.sa.add_module('conv_reduce1',
nn.Conv2d(kernel_size=1, in_channels=channel, out_channels=channel // reduction))
self.sa.add_module('bn_reduce1', nn.BatchNorm2d(channel // reduction))
self.sa.add_module('relu_reduce1', nn.ReLU())
for i in range(num_layers):
self.sa.add_module('conv_%d' % i, nn.Conv2d(kernel_size=3, in_channels=channel // reduction,
out_channels=channel // reduction, padding=autopad(3, None, dia_val), dilation=dia_val))
self.sa.add_module('bn_%d' % i, nn.BatchNorm2d(channel // reduction))
self.sa.add_module('relu_%d' % i, nn.ReLU())
self.sa.add_module('last_conv', nn.Conv2d(channel // reduction, 1, kernel_size=1))
def forward(self, x):
res = self.sa(x)
res = res.expand_as(x)
return res
class BAMBlock(nn.Module):
def __init__(self, channel=512, reduction=16, dia_val=2):
super().__init__()
self.ca = ChannelAttention(channel=channel, reduction=reduction)
self.sa = SpatialAttention(channel=channel, reduction=reduction, dia_val=dia_val)
self.sigmoid = nn.Sigmoid()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
b, c, _, _ = x.size()
sa_out = self.sa(x)
ca_out = self.ca(x)
weight = self.sigmoid(sa_out + ca_out)
out = (1 + weight) * x
return out
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
bam = BAMBlock(channel=512, reduction=16, dia_val=2)
output = bam(input)
print(output.shape)
================================================
FILE: cv-attention/Biformer.py
================================================
"""
Core of BiFormer, Bi-Level Routing Attention.
To be refactored.
author: ZHU Lei
github: https://github.com/rayleizhu
email: ray.leizhu@outlook.com
This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
from typing import Tuple, Optional
import torch
import torch.nn as nn
import torch.nn.functional as F
from einops import rearrange
from torch import Tensor, LongTensor
class TopkRouting(nn.Module):
"""
differentiable topk routing with scaling
Args:
qk_dim: int, feature dimension of query and key
topk: int, the 'topk'
qk_scale: int or None, temperature (multiply) of softmax activation
with_param: bool, wether inorporate learnable params in routing unit
diff_routing: bool, wether make routing differentiable
soft_routing: bool, wether make output value multiplied by routing weights
"""
def __init__(self, qk_dim, topk=4, qk_scale=None, param_routing=False, diff_routing=False):
super().__init__()
self.topk = topk
self.qk_dim = qk_dim
self.scale = qk_scale or qk_dim ** -0.5
self.diff_routing = diff_routing
# TODO: norm layer before/after linear?
self.emb = nn.Linear(qk_dim, qk_dim) if param_routing else nn.Identity()
# routing activation
self.routing_act = nn.Softmax(dim=-1)
def forward(self, query:Tensor, key:Tensor)->Tuple[Tensor]:
"""
Args:
q, k: (n, p^2, c) tensor
Return:
r_weight, topk_index: (n, p^2, topk) tensor
"""
if not self.diff_routing:
query, key = query.detach(), key.detach()
query_hat, key_hat = self.emb(query), self.emb(key) # per-window pooling -> (n, p^2, c)
attn_logit = (query_hat*self.scale) @ key_hat.transpose(-2, -1) # (n, p^2, p^2)
topk_attn_logit, topk_index = torch.topk(attn_logit, k=self.topk, dim=-1) # (n, p^2, k), (n, p^2, k)
r_weight = self.routing_act(topk_attn_logit) # (n, p^2, k)
return r_weight, topk_index
class KVGather(nn.Module):
def __init__(self, mul_weight='none'):
super().__init__()
assert mul_weight in ['none', 'soft', 'hard']
self.mul_weight = mul_weight
def forward(self, r_idx:Tensor, r_weight:Tensor, kv:Tensor):
"""
r_idx: (n, p^2, topk) tensor
r_weight: (n, p^2, topk) tensor
kv: (n, p^2, w^2, c_kq+c_v)
Return:
(n, p^2, topk, w^2, c_kq+c_v) tensor
"""
# select kv according to routing index
n, p2, w2, c_kv = kv.size()
topk = r_idx.size(-1)
# print(r_idx.size(), r_weight.size())
# FIXME: gather consumes much memory (topk times redundancy), write cuda kernel?
topk_kv = torch.gather(kv.view(n, 1, p2, w2, c_kv).expand(-1, p2, -1, -1, -1), # (n, p^2, p^2, w^2, c_kv) without mem cpy
dim=2,
index=r_idx.view(n, p2, topk, 1, 1).expand(-1, -1, -1, w2, c_kv) # (n, p^2, k, w^2, c_kv)
)
if self.mul_weight == 'soft':
topk_kv = r_weight.view(n, p2, topk, 1, 1) * topk_kv # (n, p^2, k, w^2, c_kv)
elif self.mul_weight == 'hard':
raise NotImplementedError('differentiable hard routing TBA')
# else: #'none'
# topk_kv = topk_kv # do nothing
return topk_kv
class QKVLinear(nn.Module):
def __init__(self, dim, qk_dim, bias=True):
super().__init__()
self.dim = dim
self.qk_dim = qk_dim
self.qkv = nn.Linear(dim, qk_dim + qk_dim + dim, bias=bias)
def forward(self, x):
q, kv = self.qkv(x).split([self.qk_dim, self.qk_dim+self.dim], dim=-1)
return q, kv
# q, k, v = self.qkv(x).split([self.qk_dim, self.qk_dim, self.dim], dim=-1)
# return q, k, v
class BiLevelRoutingAttention(nn.Module):
"""
n_win: number of windows in one side (so the actual number of windows is n_win*n_win)
kv_per_win: for kv_downsample_mode='ada_xxxpool' only, number of key/values per window. Similar to n_win, the actual number is kv_per_win*kv_per_win.
topk: topk for window filtering
param_attention: 'qkvo'-linear for q,k,v and o, 'none': param free attention
param_routing: extra linear for routing
diff_routing: wether to set routing differentiable
soft_routing: wether to multiply soft routing weights
"""
def __init__(self, dim, n_win=7, num_heads=8, qk_dim=None, qk_scale=None,
kv_per_win=4, kv_downsample_ratio=4, kv_downsample_kernel=None, kv_downsample_mode='identity',
topk=4, param_attention="qkvo", param_routing=False, diff_routing=False, soft_routing=False, side_dwconv=3,
auto_pad=True):
super().__init__()
# local attention setting
self.dim = dim
self.n_win = n_win # Wh, Ww
self.num_heads = num_heads
self.qk_dim = qk_dim or dim
assert self.qk_dim % num_heads == 0 and self.dim % num_heads==0, 'qk_dim and dim must be divisible by num_heads!'
self.scale = qk_scale or self.qk_dim ** -0.5
################side_dwconv (i.e. LCE in ShuntedTransformer)###########
self.lepe = nn.Conv2d(dim, dim, kernel_size=side_dwconv, stride=1, padding=side_dwconv//2, groups=dim) if side_dwconv > 0 else \
lambda x: torch.zeros_like(x)
################ global routing setting #################
self.topk = topk
self.param_routing = param_routing
self.diff_routing = diff_routing
self.soft_routing = soft_routing
# router
assert not (self.param_routing and not self.diff_routing) # cannot be with_param=True and diff_routing=False
self.router = TopkRouting(qk_dim=self.qk_dim,
qk_scale=self.scale,
topk=self.topk,
diff_routing=self.diff_routing,
param_routing=self.param_routing)
if self.soft_routing: # soft routing, always diffrentiable (if no detach)
mul_weight = 'soft'
elif self.diff_routing: # hard differentiable routing
mul_weight = 'hard'
else: # hard non-differentiable routing
mul_weight = 'none'
self.kv_gather = KVGather(mul_weight=mul_weight)
# qkv mapping (shared by both global routing and local attention)
self.param_attention = param_attention
if self.param_attention == 'qkvo':
self.qkv = QKVLinear(self.dim, self.qk_dim)
self.wo = nn.Linear(dim, dim)
elif self.param_attention == 'qkv':
self.qkv = QKVLinear(self.dim, self.qk_dim)
self.wo = nn.Identity()
else:
raise ValueError(f'param_attention mode {self.param_attention} is not surpported!')
self.kv_downsample_mode = kv_downsample_mode
self.kv_per_win = kv_per_win
self.kv_downsample_ratio = kv_downsample_ratio
self.kv_downsample_kenel = kv_downsample_kernel
if self.kv_downsample_mode == 'ada_avgpool':
assert self.kv_per_win is not None
self.kv_down = nn.AdaptiveAvgPool2d(self.kv_per_win)
elif self.kv_downsample_mode == 'ada_maxpool':
assert self.kv_per_win is not None
self.kv_down = nn.AdaptiveMaxPool2d(self.kv_per_win)
elif self.kv_downsample_mode == 'maxpool':
assert self.kv_downsample_ratio is not None
self.kv_down = nn.MaxPool2d(self.kv_downsample_ratio) if self.kv_downsample_ratio > 1 else nn.Identity()
elif self.kv_downsample_mode == 'avgpool':
assert self.kv_downsample_ratio is not None
self.kv_down = nn.AvgPool2d(self.kv_downsample_ratio) if self.kv_downsample_ratio > 1 else nn.Identity()
elif self.kv_downsample_mode == 'identity': # no kv downsampling
self.kv_down = nn.Identity()
elif self.kv_downsample_mode == 'fracpool':
# assert self.kv_downsample_ratio is not None
# assert self.kv_downsample_kenel is not None
# TODO: fracpool
# 1. kernel size should be input size dependent
# 2. there is a random factor, need to avoid independent sampling for k and v
raise NotImplementedError('fracpool policy is not implemented yet!')
elif kv_downsample_mode == 'conv':
# TODO: need to consider the case where k != v so that need two downsample modules
raise NotImplementedError('conv policy is not implemented yet!')
else:
raise ValueError(f'kv_down_sample_mode {self.kv_downsaple_mode} is not surpported!')
# softmax for local attention
self.attn_act = nn.Softmax(dim=-1)
self.auto_pad=auto_pad
def forward(self, x, ret_attn_mask=False):
"""
x: NHWC tensor
Return:
NHWC tensor
"""
x = rearrange(x, "n c h w -> n h w c")
# NOTE: use padding for semantic segmentation
###################################################
if self.auto_pad:
N, H_in, W_in, C = x.size()
pad_l = pad_t = 0
pad_r = (self.n_win - W_in % self.n_win) % self.n_win
pad_b = (self.n_win - H_in % self.n_win) % self.n_win
x = F.pad(x, (0, 0, # dim=-1
pad_l, pad_r, # dim=-2
pad_t, pad_b)) # dim=-3
_, H, W, _ = x.size() # padded size
else:
N, H, W, C = x.size()
assert H%self.n_win == 0 and W%self.n_win == 0 #
###################################################
# patchify, (n, p^2, w, w, c), keep 2d window as we need 2d pooling to reduce kv size
x = rearrange(x, "n (j h) (i w) c -> n (j i) h w c", j=self.n_win, i=self.n_win)
#################qkv projection###################
# q: (n, p^2, w, w, c_qk)
# kv: (n, p^2, w, w, c_qk+c_v)
# NOTE: separte kv if there were memory leak issue caused by gather
q, kv = self.qkv(x)
# pixel-wise qkv
# q_pix: (n, p^2, w^2, c_qk)
# kv_pix: (n, p^2, h_kv*w_kv, c_qk+c_v)
q_pix = rearrange(q, 'n p2 h w c -> n p2 (h w) c')
kv_pix = self.kv_down(rearrange(kv, 'n p2 h w c -> (n p2) c h w'))
kv_pix = rearrange(kv_pix, '(n j i) c h w -> n (j i) (h w) c', j=self.n_win, i=self.n_win)
q_win, k_win = q.mean([2, 3]), kv[..., 0:self.qk_dim].mean([2, 3]) # window-wise qk, (n, p^2, c_qk), (n, p^2, c_qk)
##################side_dwconv(lepe)##################
# NOTE: call contiguous to avoid gradient warning when using ddp
lepe = self.lepe(rearrange(kv[..., self.qk_dim:], 'n (j i) h w c -> n c (j h) (i w)', j=self.n_win, i=self.n_win).contiguous())
lepe = rearrange(lepe, 'n c (j h) (i w) -> n (j h) (i w) c', j=self.n_win, i=self.n_win)
############ gather q dependent k/v #################
r_weight, r_idx = self.router(q_win, k_win) # both are (n, p^2, topk) tensors
kv_pix_sel = self.kv_gather(r_idx=r_idx, r_weight=r_weight, kv=kv_pix) #(n, p^2, topk, h_kv*w_kv, c_qk+c_v)
k_pix_sel, v_pix_sel = kv_pix_sel.split([self.qk_dim, self.dim], dim=-1)
# kv_pix_sel: (n, p^2, topk, h_kv*w_kv, c_qk)
# v_pix_sel: (n, p^2, topk, h_kv*w_kv, c_v)
######### do attention as normal ####################
k_pix_sel = rearrange(k_pix_sel, 'n p2 k w2 (m c) -> (n p2) m c (k w2)', m=self.num_heads) # flatten to BMLC, (n*p^2, m, topk*h_kv*w_kv, c_kq//m) transpose here?
v_pix_sel = rearrange(v_pix_sel, 'n p2 k w2 (m c) -> (n p2) m (k w2) c', m=self.num_heads) # flatten to BMLC, (n*p^2, m, topk*h_kv*w_kv, c_v//m)
q_pix = rearrange(q_pix, 'n p2 w2 (m c) -> (n p2) m w2 c', m=self.num_heads) # to BMLC tensor (n*p^2, m, w^2, c_qk//m)
# param-free multihead attention
attn_weight = (q_pix * self.scale) @ k_pix_sel # (n*p^2, m, w^2, c) @ (n*p^2, m, c, topk*h_kv*w_kv) -> (n*p^2, m, w^2, topk*h_kv*w_kv)
attn_weight = self.attn_act(attn_weight)
out = attn_weight @ v_pix_sel # (n*p^2, m, w^2, topk*h_kv*w_kv) @ (n*p^2, m, topk*h_kv*w_kv, c) -> (n*p^2, m, w^2, c)
out = rearrange(out, '(n j i) m (h w) c -> n (j h) (i w) (m c)', j=self.n_win, i=self.n_win,
h=H//self.n_win, w=W//self.n_win)
out = out + lepe
# output linear
out = self.wo(out)
# NOTE: use padding for semantic segmentation
# crop padded region
if self.auto_pad and (pad_r > 0 or pad_b > 0):
out = out[:, :H_in, :W_in, :].contiguous()
if ret_attn_mask:
return out, r_weight, r_idx, attn_weight
else:
return rearrange(out, "n h w c -> n c h w")
class Attention(nn.Module):
"""
vanilla attention
"""
def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
# NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
self.scale = qk_scale or head_dim ** -0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x):
"""
args:
x: NCHW tensor
return:
NCHW tensor
"""
_, _, H, W = x.size()
x = rearrange(x, 'n c h w -> n (h w) c')
#######################################
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
attn = (q @ k.transpose(-2, -1)) * self.scale
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
#######################################
x = rearrange(x, 'n (h w) c -> n c h w', h=H, w=W)
return x
class AttentionLePE(nn.Module):
"""
vanilla attention
"""
def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0., side_dwconv=5):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
# NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
self.scale = qk_scale or head_dim ** -0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
self.lepe = nn.Conv2d(dim, dim, kernel_size=side_dwconv, stride=1, padding=side_dwconv//2, groups=dim) if side_dwconv > 0 else \
lambda x: torch.zeros_like(x)
def forward(self, x):
"""
args:
x: NCHW tensor
return:
NCHW tensor
"""
_, _, H, W = x.size()
x = rearrange(x, 'n c h w -> n (h w) c')
#######################################
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
lepe = self.lepe(rearrange(x, 'n (h w) c -> n c h w', h=H, w=W))
lepe = rearrange(lepe, 'n c h w -> n (h w) c')
attn = (q @ k.transpose(-2, -1)) * self.scale
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = x + lepe
x = self.proj(x)
x = self.proj_drop(x)
#######################################
x = rearrange(x, 'n (h w) c -> n c h w', h=H, w=W)
return x
def _grid2seq(x:Tensor, region_size:Tuple[int], num_heads:int):
"""
Args:
x: BCHW tensor
region size: int
num_heads: number of attention heads
Return:
out: rearranged x, has a shape of (bs, nhead, nregion, reg_size, head_dim)
region_h, region_w: number of regions per col/row
"""
B, C, H, W = x.size()
region_h, region_w = H//region_size[0], W//region_size[1]
x = x.view(B, num_heads, C//num_heads, region_h, region_size[0], region_w, region_size[1])
x = torch.einsum('bmdhpwq->bmhwpqd', x).flatten(2, 3).flatten(-3, -2) # (bs, nhead, nregion, reg_size, head_dim)
return x, region_h, region_w
def _seq2grid(x:Tensor, region_h:int, region_w:int, region_size:Tuple[int]):
"""
Args:
x: (bs, nhead, nregion, reg_size^2, head_dim)
Return:
x: (bs, C, H, W)
"""
bs, nhead, nregion, reg_size_square, head_dim = x.size()
x = x.view(bs, nhead, region_h, region_w, region_size[0], region_size[1], head_dim)
x = torch.einsum('bmhwpqd->bmdhpwq', x).reshape(bs, nhead*head_dim,
region_h*region_size[0], region_w*region_size[1])
return x
def regional_routing_attention_torch(
query:Tensor, key:Tensor, value:Tensor, scale:float,
region_graph:LongTensor, region_size:Tuple[int],
kv_region_size:Optional[Tuple[int]]=None,
auto_pad=True)->Tensor:
"""
Args:
query, key, value: (B, C, H, W) tensor
scale: the scale/temperature for dot product attention
region_graph: (B, nhead, h_q*w_q, topk) tensor, topk <= h_k*w_k
region_size: region/window size for queries, (rh, rw)
key_region_size: optional, if None, key_region_size=region_size
auto_pad: required to be true if the input sizes are not divisible by the region_size
Return:
output: (B, C, H, W) tensor
attn: (bs, nhead, q_nregion, reg_size, topk*kv_region_size) attention matrix
"""
kv_region_size = kv_region_size or region_size
bs, nhead, q_nregion, topk = region_graph.size()
# Auto pad to deal with any input size
q_pad_b, q_pad_r, kv_pad_b, kv_pad_r = 0, 0, 0, 0
if auto_pad:
_, _, Hq, Wq = query.size()
q_pad_b = (region_size[0] - Hq % region_size[0]) % region_size[0]
q_pad_r = (region_size[1] - Wq % region_size[1]) % region_size[1]
if (q_pad_b > 0 or q_pad_r > 0):
query = F.pad(query, (0, q_pad_r, 0, q_pad_b)) # zero padding
_, _, Hk, Wk = key.size()
kv_pad_b = (kv_region_size[0] - Hk % kv_region_size[0]) % kv_region_size[0]
kv_pad_r = (kv_region_size[1] - Wk % kv_region_size[1]) % kv_region_size[1]
if (kv_pad_r > 0 or kv_pad_b > 0):
key = F.pad(key, (0, kv_pad_r, 0, kv_pad_b)) # zero padding
value = F.pad(value, (0, kv_pad_r, 0, kv_pad_b)) # zero padding
# to sequence format, i.e. (bs, nhead, nregion, reg_size, head_dim)
query, q_region_h, q_region_w = _grid2seq(query, region_size=region_size, num_heads=nhead)
key, _, _ = _grid2seq(key, region_size=kv_region_size, num_heads=nhead)
value, _, _ = _grid2seq(value, region_size=kv_region_size, num_heads=nhead)
# gather key and values.
# TODO: is seperate gathering slower than fused one (our old version) ?
# torch.gather does not support broadcasting, hence we do it manually
bs, nhead, kv_nregion, kv_region_size, head_dim = key.size()
broadcasted_region_graph = region_graph.view(bs, nhead, q_nregion, topk, 1, 1).\
expand(-1, -1, -1, -1, kv_region_size, head_dim)
key_g = torch.gather(key.view(bs, nhead, 1, kv_nregion, kv_region_size, head_dim).\
expand(-1, -1, query.size(2), -1, -1, -1), dim=3,
index=broadcasted_region_graph) # (bs, nhead, q_nregion, topk, kv_region_size, head_dim)
value_g = torch.gather(value.view(bs, nhead, 1, kv_nregion, kv_region_size, head_dim).\
expand(-1, -1, query.size(2), -1, -1, -1), dim=3,
index=broadcasted_region_graph) # (bs, nhead, q_nregion, topk, kv_region_size, head_dim)
# token-to-token attention
# (bs, nhead, q_nregion, reg_size, head_dim) @ (bs, nhead, q_nregion, head_dim, topk*kv_region_size)
# -> (bs, nhead, q_nregion, reg_size, topk*kv_region_size)
# TODO: mask padding region
attn = (query * scale) @ key_g.flatten(-3, -2).transpose(-1, -2)
attn = torch.softmax(attn, dim=-1)
# (bs, nhead, q_nregion, reg_size, topk*kv_region_size) @ (bs, nhead, q_nregion, topk*kv_region_size, head_dim)
# -> (bs, nhead, q_nregion, reg_size, head_dim)
output = attn @ value_g.flatten(-3, -2)
# to BCHW format
output = _seq2grid(output, region_h=q_region_h, region_w=q_region_w, region_size=region_size)
# remove paddings if needed
if auto_pad and (q_pad_b > 0 or q_pad_r > 0):
output = output[:, :, :Hq, :Wq]
return output, attn
class BiLevelRoutingAttention_nchw(nn.Module):
"""Bi-Level Routing Attention that takes nchw input
Compared to legacy version, this implementation:
* removes unused args and components
* uses nchw input format to avoid frequent permutation
When the size of inputs is not divisible by the region size, there is also a numerical difference
than legacy implementation, due to:
* different way to pad the input feature map (padding after linear projection)
* different pooling behavior (count_include_pad=False)
Current implementation is more reasonable, hence we do not keep backward numerical compatiability
"""
def __init__(self, dim, num_heads=8, n_win=7, qk_scale=None, topk=4, side_dwconv=3, auto_pad=False, attn_backend='torch'):
super().__init__()
# local attention setting
self.dim = dim
self.num_heads = num_heads
assert self.dim % num_heads == 0, 'dim must be divisible by num_heads!'
self.head_dim = self.dim // self.num_heads
self.scale = qk_scale or self.dim ** -0.5 # NOTE: to be consistent with old models.
################side_dwconv (i.e. LCE in Shunted Transformer)###########
self.lepe = nn.Conv2d(dim, dim, kernel_size=side_dwconv, stride=1, padding=side_dwconv//2, groups=dim) if side_dwconv > 0 else \
lambda x: torch.zeros_like(x)
################ regional routing setting #################
self.topk = topk
self.n_win = n_win # number of windows per row/col
##########################################
self.qkv_linear = nn.Conv2d(self.dim, 3*self.dim, kernel_size=1)
self.output_linear = nn.Conv2d(self.dim, self.dim, kernel_size=1)
if attn_backend == 'torch':
self.attn_fn = regional_routing_attention_torch
else:
raise ValueError('CUDA implementation is not available yet. Please stay tuned.')
def forward(self, x:Tensor, ret_attn_mask=False):
"""
Args:
x: NCHW tensor, better to be channel_last (https://pytorch.org/tutorials/intermediate/memory_format_tutorial.html)
Return:
NCHW tensor
"""
N, C, H, W = x.size()
region_size = (H//self.n_win, W//self.n_win)
# STEP 1: linear projection
qkv = self.qkv_linear.forward(x) # ncHW
q, k, v = qkv.chunk(3, dim=1) # ncHW
# STEP 2: region-to-region routing
# NOTE: ceil_mode=True, count_include_pad=False = auto padding
# NOTE: gradients backward through token-to-token attention. See Appendix A for the intuition.
q_r = F.avg_pool2d(q.detach(), kernel_size=region_size, ceil_mode=True, count_include_pad=False)
k_r = F.avg_pool2d(k.detach(), kernel_size=region_size, ceil_mode=True, count_include_pad=False) # nchw
q_r:Tensor = q_r.permute(0, 2, 3, 1).flatten(1, 2) # n(hw)c
k_r:Tensor = k_r.flatten(2, 3) # nc(hw)
a_r = q_r @ k_r # n(hw)(hw), adj matrix of regional graph
_, idx_r = torch.topk(a_r, k=self.topk, dim=-1) # n(hw)k long tensor
idx_r:LongTensor = idx_r.unsqueeze_(1).expand(-1, self.num_heads, -1, -1)
# STEP 3: token to token attention (non-parametric function)
output, attn_mat = self.attn_fn(query=q, key=k, value=v, scale=self.scale,
region_graph=idx_r, region_size=region_size
)
output = output + self.lepe(v) # ncHW
output = self.output_linear(output) # ncHW
if ret_attn_mask:
return output, attn_mat
return output
================================================
FILE: cv-attention/CAA.py
================================================
import torch.nn as nn
def autopad(k, p=None, d=1): # kernel, padding, dilation
"""Pad to 'same' shape outputs."""
if d > 1:
k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
return p
class Conv(nn.Module):
"""Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
default_act = nn.SiLU() # default activation
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
"""Initialize Conv layer with given arguments including activation."""
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
def forward(self, x):
"""Apply convolution, batch normalization and activation to input tensor."""
return self.act(self.bn(self.conv(x)))
def forward_fuse(self, x):
"""Perform transposed convolution of 2D data."""
return self.act(self.conv(x))
class CAA(nn.Module):
def __init__(self, ch, h_kernel_size = 11, v_kernel_size = 11) -> None:
super().__init__()
self.avg_pool = nn.AvgPool2d(7, 1, 3)
self.conv1 = Conv(ch, ch)
self.h_conv = nn.Conv2d(ch, ch, (1, h_kernel_size), 1, (0, h_kernel_size // 2), 1, ch)
self.v_conv = nn.Conv2d(ch, ch, (v_kernel_size, 1), 1, (v_kernel_size // 2, 0), 1, ch)
self.conv2 = Conv(ch, ch)
self.act = nn.Sigmoid()
def forward(self, x):
attn_factor = self.act(self.conv2(self.v_conv(self.h_conv(self.conv1(self.avg_pool(x))))))
return attn_factor * x
================================================
FILE: cv-attention/CBAM.py
================================================
import numpy as np
import torch
from torch import nn
from torch.nn import init
class ChannelAttention(nn.Module):
def __init__(self, channel, reduction=16):
super().__init__()
self.maxpool = nn.AdaptiveMaxPool2d(1)
self.avgpool = nn.AdaptiveAvgPool2d(1)
self.se = nn.Sequential(
nn.Conv2d(channel, channel // reduction, 1, bias=False),
nn.ReLU(),
nn.Conv2d(channel // reduction, channel, 1, bias=False)
)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
max_result = self.maxpool(x)
avg_result = self.avgpool(x)
max_out = self.se(max_result)
avg_out = self.se(avg_result)
output = self.sigmoid(max_out + avg_out)
return output
class SpatialAttention(nn.Module):
def __init__(self, kernel_size=7):
super().__init__()
self.conv = nn.Conv2d(2, 1, kernel_size=kernel_size, padding=kernel_size // 2)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
max_result, _ = torch.max(x, dim=1, keepdim=True)
avg_result = torch.mean(x, dim=1, keepdim=True)
result = torch.cat([max_result, avg_result], 1)
output = self.conv(result)
output = self.sigmoid(output)
return output
class CBAMBlock(nn.Module):
def __init__(self, channel=512, reduction=16, kernel_size=7):
super().__init__()
self.ca = ChannelAttention(channel=channel, reduction=reduction)
self.sa = SpatialAttention(kernel_size=kernel_size)
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
b, c, _, _ = x.size()
out = x * self.ca(x)
out = out * self.sa(out)
return out
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
kernel_size = input.shape[2]
cbam = CBAMBlock(channel=512, reduction=16, kernel_size=kernel_size)
output = cbam(input)
print(output.shape)
================================================
FILE: cv-attention/CPCA.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
class CPCA_ChannelAttention(nn.Module):
def __init__(self, input_channels, internal_neurons):
super(CPCA_ChannelAttention, self).__init__()
self.fc1 = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True)
self.fc2 = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True)
self.input_channels = input_channels
def forward(self, inputs):
x1 = F.adaptive_avg_pool2d(inputs, output_size=(1, 1))
x1 = self.fc1(x1)
x1 = F.relu(x1, inplace=True)
x1 = self.fc2(x1)
x1 = torch.sigmoid(x1)
x2 = F.adaptive_max_pool2d(inputs, output_size=(1, 1))
x2 = self.fc1(x2)
x2 = F.relu(x2, inplace=True)
x2 = self.fc2(x2)
x2 = torch.sigmoid(x2)
x = x1 + x2
x = x.view(-1, self.input_channels, 1, 1)
return inputs * x
class CPCA(nn.Module):
def __init__(self, channels, channelAttention_reduce=4):
super().__init__()
self.ca = CPCA_ChannelAttention(input_channels=channels, internal_neurons=channels // channelAttention_reduce)
self.dconv5_5 = nn.Conv2d(channels,channels,kernel_size=5,padding=2,groups=channels)
self.dconv1_7 = nn.Conv2d(channels,channels,kernel_size=(1,7),padding=(0,3),groups=channels)
self.dconv7_1 = nn.Conv2d(channels,channels,kernel_size=(7,1),padding=(3,0),groups=channels)
self.dconv1_11 = nn.Conv2d(channels,channels,kernel_size=(1,11),padding=(0,5),groups=channels)
self.dconv11_1 = nn.Conv2d(channels,channels,kernel_size=(11,1),padding=(5,0),groups=channels)
self.dconv1_21 = nn.Conv2d(channels,channels,kernel_size=(1,21),padding=(0,10),groups=channels)
self.dconv21_1 = nn.Conv2d(channels,channels,kernel_size=(21,1),padding=(10,0),groups=channels)
self.conv = nn.Conv2d(channels,channels,kernel_size=(1,1),padding=0)
self.act = nn.GELU()
def forward(self, inputs):
# Global Perceptron
inputs = self.conv(inputs)
inputs = self.act(inputs)
inputs = self.ca(inputs)
x_init = self.dconv5_5(inputs)
x_1 = self.dconv1_7(x_init)
x_1 = self.dconv7_1(x_1)
x_2 = self.dconv1_11(x_init)
x_2 = self.dconv11_1(x_2)
x_3 = self.dconv1_21(x_init)
x_3 = self.dconv21_1(x_3)
x = x_1 + x_2 + x_3 + x_init
spatial_att = self.conv(x)
out = spatial_att * inputs
out = self.conv(out)
return out
================================================
FILE: cv-attention/CloAttention.py
================================================
import torch
import torch.nn as nn
from efficientnet_pytorch.model import MemoryEfficientSwish
class AttnMap(nn.Module):
def __init__(self, dim):
super().__init__()
self.act_block = nn.Sequential(
nn.Conv2d(dim, dim, 1, 1, 0),
MemoryEfficientSwish(),
nn.Conv2d(dim, dim, 1, 1, 0)
)
def forward(self, x):
return self.act_block(x)
class EfficientAttention(nn.Module):
def __init__(self, dim, num_heads=8, group_split=[4, 4], kernel_sizes=[5], window_size=4,
attn_drop=0., proj_drop=0., qkv_bias=True):
super().__init__()
assert sum(group_split) == num_heads
assert len(kernel_sizes) + 1 == len(group_split)
self.dim = dim
self.num_heads = num_heads
self.dim_head = dim // num_heads
self.scalor = self.dim_head ** -0.5
self.kernel_sizes = kernel_sizes
self.window_size = window_size
self.group_split = group_split
convs = []
act_blocks = []
qkvs = []
#projs = []
for i in range(len(kernel_sizes)):
kernel_size = kernel_sizes[i]
group_head = group_split[i]
if group_head == 0:
continue
convs.append(nn.Conv2d(3*self.dim_head*group_head, 3*self.dim_head*group_head, kernel_size,
1, kernel_size//2, groups=3*self.dim_head*group_head))
act_blocks.append(AttnMap(self.dim_head*group_head))
qkvs.append(nn.Conv2d(dim, 3*group_head*self.dim_head, 1, 1, 0, bias=qkv_bias))
#projs.append(nn.Linear(group_head*self.dim_head, group_head*self.dim_head, bias=qkv_bias))
if group_split[-1] != 0:
self.global_q = nn.Conv2d(dim, group_split[-1]*self.dim_head, 1, 1, 0, bias=qkv_bias)
self.global_kv = nn.Conv2d(dim, group_split[-1]*self.dim_head*2, 1, 1, 0, bias=qkv_bias)
#self.global_proj = nn.Linear(group_split[-1]*self.dim_head, group_split[-1]*self.dim_head, bias=qkv_bias)
self.avgpool = nn.AvgPool2d(window_size, window_size) if window_size!=1 else nn.Identity()
self.convs = nn.ModuleList(convs)
self.act_blocks = nn.ModuleList(act_blocks)
self.qkvs = nn.ModuleList(qkvs)
self.proj = nn.Conv2d(dim, dim, 1, 1, 0, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj_drop = nn.Dropout(proj_drop)
def high_fre_attntion(self, x: torch.Tensor, to_qkv: nn.Module, mixer: nn.Module, attn_block: nn.Module):
'''
x: (b c h w)
'''
b, c, h, w = x.size()
qkv = to_qkv(x) #(b (3 m d) h w)
qkv = mixer(qkv).reshape(b, 3, -1, h, w).transpose(0, 1).contiguous() #(3 b (m d) h w)
q, k, v = qkv #(b (m d) h w)
attn = attn_block(q.mul(k)).mul(self.scalor)
attn = self.attn_drop(torch.tanh(attn))
res = attn.mul(v) #(b (m d) h w)
return res
def low_fre_attention(self, x : torch.Tensor, to_q: nn.Module, to_kv: nn.Module, avgpool: nn.Module):
'''
x: (b c h w)
'''
b, c, h, w = x.size()
q = to_q(x).reshape(b, -1, self.dim_head, h*w).transpose(-1, -2).contiguous() #(b m (h w) d)
kv = avgpool(x) #(b c h w)
kv = to_kv(kv).view(b, 2, -1, self.dim_head, (h*w)//(self.window_size**2)).permute(1, 0, 2, 4, 3).contiguous() #(2 b m (H W) d)
k, v = kv #(b m (H W) d)
attn = self.scalor * q @ k.transpose(-1, -2) #(b m (h w) (H W))
attn = self.attn_drop(attn.softmax(dim=-1))
res = attn @ v #(b m (h w) d)
res = res.transpose(2, 3).reshape(b, -1, h, w).contiguous()
return res
def forward(self, x: torch.Tensor):
'''
x: (b c h w)
'''
res = []
for i in range(len(self.kernel_sizes)):
if self.group_split[i] == 0:
continue
res.append(self.high_fre_attntion(x, self.qkvs[i], self.convs[i], self.act_blocks[i]))
if self.group_split[-1] != 0:
res.append(self.low_fre_attention(x, self.global_q, self.global_kv, self.avgpool))
return self.proj_drop(self.proj(torch.cat(res, dim=1)))
================================================
FILE: cv-attention/CoTAttention.py
================================================
import numpy as np
import torch
from torch import flatten, nn
from torch.nn import init
from torch.nn.modules.activation import ReLU
from torch.nn.modules.batchnorm import BatchNorm2d
from torch.nn import functional as F
class CoTAttention(nn.Module):
def __init__(self, dim=512, kernel_size=3):
super().__init__()
self.dim = dim
self.kernel_size = kernel_size
self.key_embed = nn.Sequential(
nn.Conv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size // 2, groups=4, bias=False),
nn.BatchNorm2d(dim),
nn.ReLU()
)
self.value_embed = nn.Sequential(
nn.Conv2d(dim, dim, 1, bias=False),
nn.BatchNorm2d(dim)
)
factor = 4
self.attention_embed = nn.Sequential(
nn.Conv2d(2 * dim, 2 * dim // factor, 1, bias=False),
nn.BatchNorm2d(2 * dim // factor),
nn.ReLU(),
nn.Conv2d(2 * dim // factor, kernel_size * kernel_size * dim, 1)
)
def forward(self, x):
bs, c, h, w = x.shape
k1 = self.key_embed(x) # bs,c,h,w
v = self.value_embed(x).view(bs, c, -1) # bs,c,h,w
y = torch.cat([k1, x], dim=1) # bs,2c,h,w
att = self.attention_embed(y) # bs,c*k*k,h,w
att = att.reshape(bs, c, self.kernel_size * self.kernel_size, h, w)
att = att.mean(2, keepdim=False).view(bs, c, -1) # bs,c,h*w
k2 = F.softmax(att, dim=-1) * v
k2 = k2.view(bs, c, h, w)
return k1 + k2
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
cot = CoTAttention(dim=512, kernel_size=3)
output = cot(input)
print(output.shape)
================================================
FILE: cv-attention/CoordAttention.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
class h_sigmoid(nn.Module):
def __init__(self, inplace=True):
super(h_sigmoid, self).__init__()
self.relu = nn.ReLU6(inplace=inplace)
def forward(self, x):
return self.relu(x + 3) / 6
class h_swish(nn.Module):
def __init__(self, inplace=True):
super(h_swish, self).__init__()
self.sigmoid = h_sigmoid(inplace=inplace)
def forward(self, x):
return x * self.sigmoid(x)
class CoordAtt(nn.Module):
def __init__(self, inp, reduction=32):
super(CoordAtt, self).__init__()
self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
self.pool_w = nn.AdaptiveAvgPool2d((1, None))
mip = max(8, inp // reduction)
self.conv1 = nn.Conv2d(inp, mip, kernel_size=1, stride=1, padding=0)
self.bn1 = nn.BatchNorm2d(mip)
self.act = h_swish()
self.conv_h = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
self.conv_w = nn.Conv2d(mip, inp, kernel_size=1, stride=1, padding=0)
def forward(self, x):
identity = x
n, c, h, w = x.size()
x_h = self.pool_h(x)
x_w = self.pool_w(x).permute(0, 1, 3, 2)
y = torch.cat([x_h, x_w], dim=2)
y = self.conv1(y)
y = self.bn1(y)
y = self.act(y)
x_h, x_w = torch.split(y, [h, w], dim=2)
x_w = x_w.permute(0, 1, 3, 2)
a_h = self.conv_h(x_h).sigmoid()
a_w = self.conv_w(x_w).sigmoid()
out = identity * a_w * a_h
return out
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
pna = CoordAtt(inp=512)
output = pna(input)
print(output.shape)
================================================
FILE: cv-attention/DAttention.py
================================================
import torch, einops
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from timm.models.layers import trunc_normal_
class LayerNormProxy(nn.Module):
def __init__(self, dim):
super().__init__()
self.norm = nn.LayerNorm(dim)
def forward(self, x):
x = einops.rearrange(x, 'b c h w -> b h w c')
x = self.norm(x)
return einops.rearrange(x, 'b h w c -> b c h w')
class DAttention(nn.Module):
# Vision Transformer with Deformable Attention CVPR2022
# fixed_pe=True need adujust 640x640
def __init__(
self, channel, q_size, n_heads=8, n_groups=4,
attn_drop=0.0, proj_drop=0.0, stride=1,
offset_range_factor=4, use_pe=True, dwc_pe=True,
no_off=False, fixed_pe=False, ksize=3, log_cpb=False, kv_size=None
):
super().__init__()
n_head_channels = channel // n_heads
self.dwc_pe = dwc_pe
self.n_head_channels = n_head_channels
self.scale = self.n_head_channels ** -0.5
self.n_heads = n_heads
self.q_h, self.q_w = q_size
# self.kv_h, self.kv_w = kv_size
self.kv_h, self.kv_w = self.q_h // stride, self.q_w // stride
self.nc = n_head_channels * n_heads
self.n_groups = n_groups
self.n_group_channels = self.nc // self.n_groups
self.n_group_heads = self.n_heads // self.n_groups
self.use_pe = use_pe
self.fixed_pe = fixed_pe
self.no_off = no_off
self.offset_range_factor = offset_range_factor
self.ksize = ksize
self.log_cpb = log_cpb
self.stride = stride
kk = self.ksize
pad_size = kk // 2 if kk != stride else 0
self.conv_offset = nn.Sequential(
nn.Conv2d(self.n_group_channels, self.n_group_channels, kk, stride, pad_size, groups=self.n_group_channels),
LayerNormProxy(self.n_group_channels),
nn.GELU(),
nn.Conv2d(self.n_group_channels, 2, 1, 1, 0, bias=False)
)
if self.no_off:
for m in self.conv_offset.parameters():
m.requires_grad_(False)
self.proj_q = nn.Conv2d(
self.nc, self.nc,
kernel_size=1, stride=1, padding=0
)
self.proj_k = nn.Conv2d(
self.nc, self.nc,
kernel_size=1, stride=1, padding=0
)
self.proj_v = nn.Conv2d(
self.nc, self.nc,
kernel_size=1, stride=1, padding=0
)
self.proj_out = nn.Conv2d(
self.nc, self.nc,
kernel_size=1, stride=1, padding=0
)
self.proj_drop = nn.Dropout(proj_drop, inplace=True)
self.attn_drop = nn.Dropout(attn_drop, inplace=True)
if self.use_pe and not self.no_off:
if self.dwc_pe:
self.rpe_table = nn.Conv2d(
self.nc, self.nc, kernel_size=3, stride=1, padding=1, groups=self.nc)
elif self.fixed_pe:
self.rpe_table = nn.Parameter(
torch.zeros(self.n_heads, self.q_h * self.q_w, self.kv_h * self.kv_w)
)
trunc_normal_(self.rpe_table, std=0.01)
elif self.log_cpb:
# Borrowed from Swin-V2
self.rpe_table = nn.Sequential(
nn.Linear(2, 32, bias=True),
nn.ReLU(inplace=True),
nn.Linear(32, self.n_group_heads, bias=False)
)
else:
self.rpe_table = nn.Parameter(
torch.zeros(self.n_heads, self.q_h * 2 - 1, self.q_w * 2 - 1)
)
trunc_normal_(self.rpe_table, std=0.01)
else:
self.rpe_table = None
@torch.no_grad()
def _get_ref_points(self, H_key, W_key, B, dtype, device):
ref_y, ref_x = torch.meshgrid(
torch.linspace(0.5, H_key - 0.5, H_key, dtype=dtype, device=device),
torch.linspace(0.5, W_key - 0.5, W_key, dtype=dtype, device=device),
indexing='ij'
)
ref = torch.stack((ref_y, ref_x), -1)
ref[..., 1].div_(W_key - 1.0).mul_(2.0).sub_(1.0)
ref[..., 0].div_(H_key - 1.0).mul_(2.0).sub_(1.0)
ref = ref[None, ...].expand(B * self.n_groups, -1, -1, -1) # B * g H W 2
return ref
@torch.no_grad()
def _get_q_grid(self, H, W, B, dtype, device):
ref_y, ref_x = torch.meshgrid(
torch.arange(0, H, dtype=dtype, device=device),
torch.arange(0, W, dtype=dtype, device=device),
indexing='ij'
)
ref = torch.stack((ref_y, ref_x), -1)
ref[..., 1].div_(W - 1.0).mul_(2.0).sub_(1.0)
ref[..., 0].div_(H - 1.0).mul_(2.0).sub_(1.0)
ref = ref[None, ...].expand(B * self.n_groups, -1, -1, -1) # B * g H W 2
return ref
def forward(self, x):
B, C, H, W = x.size()
dtype, device = x.dtype, x.device
q = self.proj_q(x)
q_off = einops.rearrange(q, 'b (g c) h w -> (b g) c h w', g=self.n_groups, c=self.n_group_channels)
offset = self.conv_offset(q_off).contiguous() # B * g 2 Hg Wg
Hk, Wk = offset.size(2), offset.size(3)
n_sample = Hk * Wk
if self.offset_range_factor >= 0 and not self.no_off:
offset_range = torch.tensor([1.0 / (Hk - 1.0), 1.0 / (Wk - 1.0)], device=device).reshape(1, 2, 1, 1)
offset = offset.tanh().mul(offset_range).mul(self.offset_range_factor)
offset = einops.rearrange(offset, 'b p h w -> b h w p')
reference = self._get_ref_points(Hk, Wk, B, dtype, device)
if self.no_off:
offset = offset.fill_(0.0)
if self.offset_range_factor >= 0:
pos = offset + reference
else:
pos = (offset + reference).clamp(-1., +1.)
if self.no_off:
x_sampled = F.avg_pool2d(x, kernel_size=self.stride, stride=self.stride)
assert x_sampled.size(2) == Hk and x_sampled.size(3) == Wk, f"Size is {x_sampled.size()}"
else:
pos = pos.type(x.dtype)
x_sampled = F.grid_sample(
input=x.reshape(B * self.n_groups, self.n_group_channels, H, W),
grid=pos[..., (1, 0)], # y, x -> x, y
mode='bilinear', align_corners=True) # B * g, Cg, Hg, Wg
x_sampled = x_sampled.reshape(B, C, 1, n_sample)
q = q.reshape(B * self.n_heads, self.n_head_channels, H * W)
k = self.proj_k(x_sampled).reshape(B * self.n_heads, self.n_head_channels, n_sample)
v = self.proj_v(x_sampled).reshape(B * self.n_heads, self.n_head_channels, n_sample)
attn = torch.einsum('b c m, b c n -> b m n', q, k) # B * h, HW, Ns
attn = attn.mul(self.scale)
if self.use_pe and (not self.no_off):
if self.dwc_pe:
residual_lepe = self.rpe_table(q.reshape(B, C, H, W)).reshape(B * self.n_heads, self.n_head_channels, H * W)
elif self.fixed_pe:
rpe_table = self.rpe_table
attn_bias = rpe_table[None, ...].expand(B, -1, -1, -1)
attn = attn + attn_bias.reshape(B * self.n_heads, H * W, n_sample)
elif self.log_cpb:
q_grid = self._get_q_grid(H, W, B, dtype, device)
displacement = (q_grid.reshape(B * self.n_groups, H * W, 2).unsqueeze(2) - pos.reshape(B * self.n_groups, n_sample, 2).unsqueeze(1)).mul(4.0) # d_y, d_x [-8, +8]
displacement = torch.sign(displacement) * torch.log2(torch.abs(displacement) + 1.0) / np.log2(8.0)
attn_bias = self.rpe_table(displacement) # B * g, H * W, n_sample, h_g
attn = attn + einops.rearrange(attn_bias, 'b m n h -> (b h) m n', h=self.n_group_heads)
else:
rpe_table = self.rpe_table
rpe_bias = rpe_table[None, ...].expand(B, -1, -1, -1)
q_grid = self._get_q_grid(H, W, B, dtype, device)
displacement = (q_grid.reshape(B * self.n_groups, H * W, 2).unsqueeze(2) - pos.reshape(B * self.n_groups, n_sample, 2).unsqueeze(1)).mul(0.5)
attn_bias = F.grid_sample(
input=einops.rearrange(rpe_bias, 'b (g c) h w -> (b g) c h w', c=self.n_group_heads, g=self.n_groups),
grid=displacement[..., (1, 0)],
mode='bilinear', align_corners=True) # B * g, h_g, HW, Ns
attn_bias = attn_bias.reshape(B * self.n_heads, H * W, n_sample)
attn = attn + attn_bias
attn = F.softmax(attn, dim=2)
attn = self.attn_drop(attn)
out = torch.einsum('b m n, b c n -> b c m', attn, v)
if self.use_pe and self.dwc_pe:
out = out + residual_lepe
out = out.reshape(B, C, H, W)
y = self.proj_drop(self.proj_out(out))
return y
================================================
FILE: cv-attention/ECA.py
================================================
import torch, math
from torch import nn
class EfficientChannelAttention(nn.Module): # Efficient Channel Attention module
def __init__(self, c, b=1, gamma=2):
super(EfficientChannelAttention, self).__init__()
t = int(abs((math.log(c, 2) + b) / gamma))
k = t if t % 2 else t + 1
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv1 = nn.Conv1d(1, 1, kernel_size=k, padding=int(k/2), bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.avg_pool(x)
out = self.conv1(out.squeeze(-1).transpose(-1, -2)).transpose(-1, -2).unsqueeze(-1)
out = self.sigmoid(out)
return out * x
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
eca = EfficientChannelAttention(c=512)
output = eca(input)
print(output.shape)
================================================
FILE: cv-attention/ELA.py
================================================
import torch.nn as nn
class ELA(nn.Module):
def __init__(self, channels) -> None:
super().__init__()
self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
self.pool_w = nn.AdaptiveAvgPool2d((1, None))
self.conv1x1 = nn.Sequential(
nn.Conv1d(channels, channels, 1),
nn.GroupNorm(16, channels),
nn.Sigmoid()
)
def forward(self, x):
b, c, h, w = x.size()
x_h = self.conv1x1(self.pool_h(x).reshape((b, c, h))).reshape((b, c, h, 1))
x_w = self.conv1x1(self.pool_w(x).reshape((b, c, w))).reshape((b, c, 1, w))
return x * x_h * x_w
================================================
FILE: cv-attention/EMA.py
================================================
import torch
from torch import nn
class EMA(nn.Module):
def __init__(self, channels, factor=8):
super(EMA, self).__init__()
self.groups = factor
assert channels // self.groups > 0
self.softmax = nn.Softmax(-1)
self.agp = nn.AdaptiveAvgPool2d((1, 1))
self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
self.pool_w = nn.AdaptiveAvgPool2d((1, None))
self.gn = nn.GroupNorm(channels // self.groups, channels // self.groups)
self.conv1x1 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=1, stride=1, padding=0)
self.conv3x3 = nn.Conv2d(channels // self.groups, channels // self.groups, kernel_size=3, stride=1, padding=1)
def forward(self, x):
b, c, h, w = x.size()
group_x = x.reshape(b * self.groups, -1, h, w) # b*g,c//g,h,w
x_h = self.pool_h(group_x)
x_w = self.pool_w(group_x).permute(0, 1, 3, 2)
hw = self.conv1x1(torch.cat([x_h, x_w], dim=2))
x_h, x_w = torch.split(hw, [h, w], dim=2)
x1 = self.gn(group_x * x_h.sigmoid() * x_w.permute(0, 1, 3, 2).sigmoid())
x2 = self.conv3x3(group_x)
x11 = self.softmax(self.agp(x1).reshape(b * self.groups, -1, 1).permute(0, 2, 1))
x12 = x2.reshape(b * self.groups, c // self.groups, -1) # b*g, c//g, hw
x21 = self.softmax(self.agp(x2).reshape(b * self.groups, -1, 1).permute(0, 2, 1))
x22 = x1.reshape(b * self.groups, c // self.groups, -1) # b*g, c//g, hw
weights = (torch.matmul(x11, x12) + torch.matmul(x21, x22)).reshape(b * self.groups, 1, h, w)
return (group_x * weights.sigmoid()).reshape(b, c, h, w)
================================================
FILE: cv-attention/EffectiveSE.py
================================================
import torch
from torch import nn as nn
from timm.models.layers.create_act import create_act_layer
class EffectiveSEModule(nn.Module):
def __init__(self, channels, add_maxpool=False, gate_layer='hard_sigmoid'):
super(EffectiveSEModule, self).__init__()
self.add_maxpool = add_maxpool
self.fc = nn.Conv2d(channels, channels, kernel_size=1, padding=0)
self.gate = create_act_layer(gate_layer)
def forward(self, x):
x_se = x.mean((2, 3), keepdim=True)
if self.add_maxpool:
# experimental codepath, may remove or change
x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True)
x_se = self.fc(x_se)
return x * self.gate(x_se)
if __name__ == '__main__':
input=torch.randn(50,512,7,7)
Ese = EffectiveSEModule(512)
output=Ese(input)
print(output.shape)
================================================
FILE: cv-attention/GAM.py
================================================
import torch.nn as nn
import torch
class GAM_Attention(nn.Module):
def __init__(self, in_channels, rate=4):
super(GAM_Attention, self).__init__()
self.channel_attention = nn.Sequential(
nn.Linear(in_channels, int(in_channels / rate)),
nn.ReLU(inplace=True),
nn.Linear(int(in_channels / rate), in_channels)
)
self.spatial_attention = nn.Sequential(
nn.Conv2d(in_channels, int(in_channels / rate), kernel_size=7, padding=3),
nn.BatchNorm2d(int(in_channels / rate)),
nn.ReLU(inplace=True),
nn.Conv2d(int(in_channels / rate), in_channels, kernel_size=7, padding=3),
nn.BatchNorm2d(in_channels)
)
def forward(self, x):
b, c, h, w = x.shape
x_permute = x.permute(0, 2, 3, 1).view(b, -1, c)
x_att_permute = self.channel_attention(x_permute).view(b, h, w, c)
x_channel_att = x_att_permute.permute(0, 3, 1, 2).sigmoid()
x = x * x_channel_att
x_spatial_att = self.spatial_attention(x).sigmoid()
out = x * x_spatial_att
return out
if __name__ == '__main__':
x = torch.randn(1, 64, 20, 20)
b, c, h, w = x.shape
net = GAM_Attention(in_channels=c)
y = net(x)
print(y.size())
================================================
FILE: cv-attention/GC.py
================================================
import torch
from torch import nn as nn
import torch.nn.functional as F
from timm.models.layers.create_act import create_act_layer, get_act_layer
from timm.models.layers import make_divisible
from timm.models.layers.mlp import ConvMlp
from timm.models.layers.norm import LayerNorm2d
class GlobalContext(nn.Module):
def __init__(self, channels, use_attn=True, fuse_add=False, fuse_scale=True, init_last_zero=False,
rd_ratio=1./8, rd_channels=None, rd_divisor=1, act_layer=nn.ReLU, gate_layer='sigmoid'):
super(GlobalContext, self).__init__()
act_layer = get_act_layer(act_layer)
self.conv_attn = nn.Conv2d(channels, 1, kernel_size=1, bias=True) if use_attn else None
if rd_channels is None:
rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)
if fuse_add:
self.mlp_add = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d)
else:
self.mlp_add = None
if fuse_scale:
self.mlp_scale = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d)
else:
self.mlp_scale = None
self.gate = create_act_layer(gate_layer)
self.init_last_zero = init_last_zero
self.reset_parameters()
def reset_parameters(self):
if self.conv_attn is not None:
nn.init.kaiming_normal_(self.conv_attn.weight, mode='fan_in', nonlinearity='relu')
if self.mlp_add is not None:
nn.init.zeros_(self.mlp_add.fc2.weight)
def forward(self, x):
B, C, H, W = x.shape
if self.conv_attn is not None:
attn = self.conv_attn(x).reshape(B, 1, H * W) # (B, 1, H * W)
attn = F.softmax(attn, dim=-1).unsqueeze(3) # (B, 1, H * W, 1)
context = x.reshape(B, C, H * W).unsqueeze(1) @ attn
context = context.view(B, C, 1, 1)
else:
context = x.mean(dim=(2, 3), keepdim=True)
if self.mlp_scale is not None:
mlp_x = self.mlp_scale(context)
x = x * self.gate(mlp_x)
if self.mlp_add is not None:
mlp_x = self.mlp_add(context)
x = x + mlp_x
return x
if __name__ == '__main__':
input=torch.randn(50,512,7,7)
gc = GlobalContext(512)
output=gc(input)
print(output.shape)
================================================
FILE: cv-attention/GE.py
================================================
import math, torch
from torch import nn as nn
import torch.nn.functional as F
from timm.models.layers.create_act import create_act_layer, get_act_layer
from timm.models.layers.create_conv2d import create_conv2d
from timm.models.layers import make_divisible
from timm.models.layers.mlp import ConvMlp
class GatherExcite(nn.Module):
def __init__(
self, channels, feat_size=None, extra_params=False, extent=0, use_mlp=True,
rd_ratio=1./16, rd_channels=None, rd_divisor=1, add_maxpool=False,
act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, gate_layer='sigmoid'):
super(GatherExcite, self).__init__()
self.add_maxpool = add_maxpool
act_layer = get_act_layer(act_layer)
self.extent = extent
if extra_params:
self.gather = nn.Sequential()
if extent == 0:
assert feat_size is not None, 'spatial feature size must be specified for global extent w/ params'
self.gather.add_module(
'conv1', create_conv2d(channels, channels, kernel_size=feat_size, stride=1, depthwise=True))
if norm_layer:
self.gather.add_module(f'norm1', nn.BatchNorm2d(channels))
else:
assert extent % 2 == 0
num_conv = int(math.log2(extent))
for i in range(num_conv):
self.gather.add_module(
f'conv{i + 1}',
create_conv2d(channels, channels, kernel_size=3, stride=2, depthwise=True))
if norm_layer:
self.gather.add_module(f'norm{i + 1}', nn.BatchNorm2d(channels))
if i != num_conv - 1:
self.gather.add_module(f'act{i + 1}', act_layer(inplace=True))
else:
self.gather = None
if self.extent == 0:
self.gk = 0
self.gs = 0
else:
assert extent % 2 == 0
self.gk = self.extent * 2 - 1
self.gs = self.extent
if not rd_channels:
rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)
self.mlp = ConvMlp(channels, rd_channels, act_layer=act_layer) if use_mlp else nn.Identity()
self.gate = create_act_layer(gate_layer)
def forward(self, x):
size = x.shape[-2:]
if self.gather is not None:
x_ge = self.gather(x)
else:
if self.extent == 0:
# global extent
x_ge = x.mean(dim=(2, 3), keepdims=True)
if self.add_maxpool:
# experimental codepath, may remove or change
x_ge = 0.5 * x_ge + 0.5 * x.amax((2, 3), keepdim=True)
else:
x_ge = F.avg_pool2d(
x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2, count_include_pad=False)
if self.add_maxpool:
# experimental codepath, may remove or change
x_ge = 0.5 * x_ge + 0.5 * F.max_pool2d(x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2)
x_ge = self.mlp(x_ge)
if x_ge.shape[-1] != 1 or x_ge.shape[-2] != 1:
x_ge = F.interpolate(x_ge, size=size)
return x * self.gate(x_ge)
if __name__ == '__main__':
input=torch.randn(50,512,7,7)
GE = GatherExcite(512)
output=GE(input)
print(output.shape)
================================================
FILE: cv-attention/LSKA.py
================================================
import torch.nn as nn
class LSKA(nn.Module):
# Large-Separable-Kernel-Attention
# https://github.com/StevenLauHKHK/Large-Separable-Kernel-Attention/tree/main
def __init__(self, dim, k_size=7):
super().__init__()
self.k_size = k_size
if k_size == 7:
self.conv0h = nn.Conv2d(dim, dim, kernel_size=(1, 3), stride=(1,1), padding=(0,(3-1)//2), groups=dim)
self.conv0v = nn.Conv2d(dim, dim, kernel_size=(3, 1), stride=(1,1), padding=((3-1)//2,0), groups=dim)
self.conv_spatial_h = nn.Conv2d(dim, dim, kernel_size=(1, 3), stride=(1,1), padding=(0,2), groups=dim, dilation=2)
self.conv_spatial_v = nn.Conv2d(dim, dim, kernel_size=(3, 1), stride=(1,1), padding=(2,0), groups=dim, dilation=2)
elif k_size == 11:
self.conv0h = nn.Conv2d(dim, dim, kernel_size=(1, 3), stride=(1,1), padding=(0,(3-1)//2), groups=dim)
self.conv0v = nn.Conv2d(dim, dim, kernel_size=(3, 1), stride=(1,1), padding=((3-1)//2,0), groups=dim)
self.conv_spatial_h = nn.Conv2d(dim, dim, kernel_size=(1, 5), stride=(1,1), padding=(0,4), groups=dim, dilation=2)
self.conv_spatial_v = nn.Conv2d(dim, dim, kernel_size=(5, 1), stride=(1,1), padding=(4,0), groups=dim, dilation=2)
elif k_size == 23:
self.conv0h = nn.Conv2d(dim, dim, kernel_size=(1, 5), stride=(1,1), padding=(0,(5-1)//2), groups=dim)
self.conv0v = nn.Conv2d(dim, dim, kernel_size=(5, 1), stride=(1,1), padding=((5-1)//2,0), groups=dim)
self.conv_spatial_h = nn.Conv2d(dim, dim, kernel_size=(1, 7), stride=(1,1), padding=(0,9), groups=dim, dilation=3)
self.conv_spatial_v = nn.Conv2d(dim, dim, kernel_size=(7, 1), stride=(1,1), padding=(9,0), groups=dim, dilation=3)
elif k_size == 35:
self.conv0h = nn.Conv2d(dim, dim, kernel_size=(1, 5), stride=(1,1), padding=(0,(5-1)//2), groups=dim)
self.conv0v = nn.Conv2d(dim, dim, kernel_size=(5, 1), stride=(1,1), padding=((5-1)//2,0), groups=dim)
self.conv_spatial_h = nn.Conv2d(dim, dim, kernel_size=(1, 11), stride=(1,1), padding=(0,15), groups=dim, dilation=3)
self.conv_spatial_v = nn.Conv2d(dim, dim, kernel_size=(11, 1), stride=(1,1), padding=(15,0), groups=dim, dilation=3)
elif k_size == 41:
self.conv0h = nn.Conv2d(dim, dim, kernel_size=(1, 5), stride=(1,1), padding=(0,(5-1)//2), groups=dim)
self.conv0v = nn.Conv2d(dim, dim, kernel_size=(5, 1), stride=(1,1), padding=((5-1)//2,0), groups=dim)
self.conv_spatial_h = nn.Conv2d(dim, dim, kernel_size=(1, 13), stride=(1,1), padding=(0,18), groups=dim, dilation=3)
self.conv_spatial_v = nn.Conv2d(dim, dim, kernel_size=(13, 1), stride=(1,1), padding=(18,0), groups=dim, dilation=3)
elif k_size == 53:
self.conv0h = nn.Conv2d(dim, dim, kernel_size=(1, 5), stride=(1,1), padding=(0,(5-1)//2), groups=dim)
self.conv0v = nn.Conv2d(dim, dim, kernel_size=(5, 1), stride=(1,1), padding=((5-1)//2,0), groups=dim)
self.conv_spatial_h = nn.Conv2d(dim, dim, kernel_size=(1, 17), stride=(1,1), padding=(0,24), groups=dim, dilation=3)
self.conv_spatial_v = nn.Conv2d(dim, dim, kernel_size=(17, 1), stride=(1,1), padding=(24,0), groups=dim, dilation=3)
self.conv1 = nn.Conv2d(dim, dim, 1)
def forward(self, x):
u = x.clone()
attn = self.conv0h(x)
attn = self.conv0v(attn)
attn = self.conv_spatial_h(attn)
attn = self.conv_spatial_v(attn)
attn = self.conv1(attn)
return u * attn
================================================
FILE: cv-attention/LSKBlock.py
================================================
import torch
import torch.nn as nn
class LSKblock(nn.Module):
def __init__(self, dim):
super().__init__()
self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
self.conv_spatial = nn.Conv2d(dim, dim, 7, stride=1, padding=9, groups=dim, dilation=3)
self.conv1 = nn.Conv2d(dim, dim//2, 1)
self.conv2 = nn.Conv2d(dim, dim//2, 1)
self.conv_squeeze = nn.Conv2d(2, 2, 7, padding=3)
self.conv = nn.Conv2d(dim//2, dim, 1)
def forward(self, x):
attn1 = self.conv0(x)
attn2 = self.conv_spatial(attn1)
attn1 = self.conv1(attn1)
attn2 = self.conv2(attn2)
attn = torch.cat([attn1, attn2], dim=1)
avg_attn = torch.mean(attn, dim=1, keepdim=True)
max_attn, _ = torch.max(attn, dim=1, keepdim=True)
agg = torch.cat([avg_attn, max_attn], dim=1)
sig = self.conv_squeeze(agg).sigmoid()
attn = attn1 * sig[:,0,:,:].unsqueeze(1) + attn2 * sig[:,1,:,:].unsqueeze(1)
attn = self.conv(attn)
return x * attn
================================================
FILE: cv-attention/MHSA.py
================================================
import torch
import torch.nn as nn
class MHSA(nn.Module):
def __init__(self, n_dims, width=14, height=14, heads=4, pos_emb=False):
super(MHSA, self).__init__()
self.heads = heads
self.query = nn.Conv2d(n_dims, n_dims, kernel_size=1)
self.key = nn.Conv2d(n_dims, n_dims, kernel_size=1)
self.value = nn.Conv2d(n_dims, n_dims, kernel_size=1)
self.pos = pos_emb
if self.pos:
self.rel_h_weight = nn.Parameter(torch.randn([1, heads, (n_dims) // heads, 1, int(height)]),
requires_grad=True)
self.rel_w_weight = nn.Parameter(torch.randn([1, heads, (n_dims) // heads, int(width), 1]),
requires_grad=True)
self.softmax = nn.Softmax(dim=-1)
def forward(self, x):
n_batch, C, width, height = x.size()
q = self.query(x).view(n_batch, self.heads, C // self.heads, -1)
k = self.key(x).view(n_batch, self.heads, C // self.heads, -1)
v = self.value(x).view(n_batch, self.heads, C // self.heads, -1)
content_content = torch.matmul(q.permute(0, 1, 3, 2), k) # 1,C,h*w,h*w
c1, c2, c3, c4 = content_content.size()
if self.pos:
content_position = (self.rel_h_weight + self.rel_w_weight).view(1, self.heads, C // self.heads, -1).permute(
0, 1, 3, 2) # 1,4,1024,64
content_position = torch.matmul(content_position, q) # ([1, 4, 1024, 256])
content_position = content_position if (
content_content.shape == content_position.shape) else content_position[:, :, :c3, ]
assert (content_content.shape == content_position.shape)
energy = content_content + content_position
else:
energy = content_content
attention = self.softmax(energy)
out = torch.matmul(v, attention.permute(0, 1, 3, 2)) # 1,4,256,64
out = out.view(n_batch, C, width, height)
return out
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
mhsa = MHSA(n_dims=512)
output = mhsa(input)
print(output.shape)
================================================
FILE: cv-attention/MLCA.py
================================================
import math, torch
from torch import nn
import torch.nn.functional as F
class MLCA(nn.Module):
def __init__(self, in_size, local_size=5, gamma = 2, b = 1,local_weight=0.5):
super(MLCA, self).__init__()
# ECA 计算方法
self.local_size=local_size
self.gamma = gamma
self.b = b
t = int(abs(math.log(in_size, 2) + self.b) / self.gamma) # eca gamma=2
k = t if t % 2 else t + 1
self.conv = nn.Conv1d(1, 1, kernel_size=k, padding=(k - 1) // 2, bias=False)
self.conv_local = nn.Conv1d(1, 1, kernel_size=k, padding=(k - 1) // 2, bias=False)
self.local_weight=local_weight
self.local_arv_pool = nn.AdaptiveAvgPool2d(local_size)
self.global_arv_pool=nn.AdaptiveAvgPool2d(1)
def forward(self, x):
local_arv=self.local_arv_pool(x)
global_arv=self.global_arv_pool(local_arv)
b,c,m,n = x.shape
b_local, c_local, m_local, n_local = local_arv.shape
# (b,c,local_size,local_size) -> (b,c,local_size*local_size) -> (b,local_size*local_size,c) -> (b,1,local_size*local_size*c)
temp_local= local_arv.view(b, c_local, -1).transpose(-1, -2).reshape(b, 1, -1)
# (b,c,1,1) -> (b,c,1) -> (b,1,c)
temp_global = global_arv.view(b, c, -1).transpose(-1, -2)
y_local = self.conv_local(temp_local)
y_global = self.conv(temp_global)
# (b,c,local_size,local_size) <- (b,c,local_size*local_size)<-(b,local_size*local_size,c) <- (b,1,local_size*local_size*c)
y_local_transpose=y_local.reshape(b, self.local_size * self.local_size,c).transpose(-1,-2).view(b, c, self.local_size , self.local_size)
# (b,1,c) -> (b,c,1) -> (b,c,1,1)
y_global_transpose = y_global.transpose(-1,-2).unsqueeze(-1)
# 反池化
att_local = y_local_transpose.sigmoid()
att_global = F.adaptive_avg_pool2d(y_global_transpose.sigmoid(),[self.local_size, self.local_size])
att_all = F.adaptive_avg_pool2d(att_global*(1-self.local_weight)+(att_local*self.local_weight), [m, n])
x = x * att_all
return x
if __name__ == '__main__':
attention = MLCA(in_size=256)
inputs = torch.randn((2, 256, 16, 16))
result = attention(inputs)
print(result.size())
================================================
FILE: cv-attention/MobileViTAttention.py
================================================
from torch import nn
import torch
from einops import rearrange
class PreNorm(nn.Module):
def __init__(self, dim, fn):
super().__init__()
self.ln = nn.LayerNorm(dim)
self.fn = fn
def forward(self, x, **kwargs):
return self.fn(self.ln(x), **kwargs)
class FeedForward(nn.Module):
def __init__(self, dim, mlp_dim, dropout):
super().__init__()
self.net = nn.Sequential(
nn.Linear(dim, mlp_dim),
nn.SiLU(),
nn.Dropout(dropout),
nn.Linear(mlp_dim, dim),
nn.Dropout(dropout)
)
def forward(self, x):
return self.net(x)
class Attention(nn.Module):
def __init__(self, dim, heads, head_dim, dropout):
super().__init__()
inner_dim = heads * head_dim
project_out = not (heads == 1 and head_dim == dim)
self.heads = heads
self.scale = head_dim ** -0.5
self.attend = nn.Softmax(dim=-1)
self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
self.to_out = nn.Sequential(
nn.Linear(inner_dim, dim),
nn.Dropout(dropout)
) if project_out else nn.Identity()
def forward(self, x):
qkv = self.to_qkv(x).chunk(3, dim=-1)
q, k, v = map(lambda t: rearrange(t, 'b p n (h d) -> b p h n d', h=self.heads), qkv)
dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
attn = self.attend(dots)
out = torch.matmul(attn, v)
out = rearrange(out, 'b p h n d -> b p n (h d)')
return self.to_out(out)
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, head_dim, mlp_dim, dropout=0.):
super().__init__()
self.layers = nn.ModuleList([])
for _ in range(depth):
self.layers.append(nn.ModuleList([
PreNorm(dim, Attention(dim, heads, head_dim, dropout)),
PreNorm(dim, FeedForward(dim, mlp_dim, dropout))
]))
def forward(self, x):
out = x
for att, ffn in self.layers:
out = out + att(out)
out = out + ffn(out)
return out
class MobileViTAttention(nn.Module):
def __init__(self, in_channel=3, dim=512, kernel_size=3, patch_size=7):
super().__init__()
self.ph, self.pw = patch_size, patch_size
self.conv1 = nn.Conv2d(in_channel, in_channel, kernel_size=kernel_size, padding=kernel_size // 2)
self.conv2 = nn.Conv2d(in_channel, dim, kernel_size=1)
self.trans = Transformer(dim=dim, depth=3, heads=8, head_dim=64, mlp_dim=1024)
self.conv3 = nn.Conv2d(dim, in_channel, kernel_size=1)
self.conv4 = nn.Conv2d(2 * in_channel, in_channel, kernel_size=kernel_size, padding=kernel_size // 2)
def forward(self, x):
y = x.clone() # bs,c,h,w
## Local Representation
y = self.conv2(self.conv1(x)) # bs,dim,h,w
## Global Representation
_, _, h, w = y.shape
y = rearrange(y, 'bs dim (nh ph) (nw pw) -> bs (ph pw) (nh nw) dim', ph=self.ph, pw=self.pw) # bs,h,w,dim
y = self.trans(y)
y = rearrange(y, 'bs (ph pw) (nh nw) dim -> bs dim (nh ph) (nw pw)', ph=self.ph, pw=self.pw, nh=h // self.ph,
nw=w // self.pw) # bs,dim,h,w
## Fusion
y = self.conv3(y) # bs,dim,h,w
y = torch.cat([x, y], 1) # bs,2*dim,h,w
y = self.conv4(y) # bs,c,h,w
return y
if __name__ == '__main__':
m = MobileViTAttention(in_channel=512)
input = torch.randn(1, 512, 49, 49)
output = m(input)
print(output.shape)
================================================
FILE: cv-attention/ParNetAttention.py
================================================
import numpy as np
import torch
from torch import nn
from torch.nn import init
class ParNetAttention(nn.Module):
def __init__(self, channel=512):
super().__init__()
self.sse = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(channel, channel, kernel_size=1),
nn.Sigmoid()
)
self.conv1x1 = nn.Sequential(
nn.Conv2d(channel, channel, kernel_size=1),
nn.BatchNorm2d(channel)
)
self.conv3x3 = nn.Sequential(
nn.Conv2d(channel, channel, kernel_size=3, padding=1),
nn.BatchNorm2d(channel)
)
self.silu = nn.SiLU()
def forward(self, x):
b, c, _, _ = x.size()
x1 = self.conv1x1(x)
x2 = self.conv3x3(x)
x3 = self.sse(x) * x
y = self.silu(x1 + x2 + x3)
return y
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
pna = ParNetAttention(channel=512)
output = pna(input)
print(output.shape)
================================================
FILE: cv-attention/PolarizedSelfAttention.py
================================================
import numpy as np
import torch
from torch import nn
from torch.nn import init
class ParallelPolarizedSelfAttention(nn.Module):
def __init__(self, channel=512):
super().__init__()
self.ch_wv=nn.Conv2d(channel,channel//2,kernel_size=(1,1))
self.ch_wq=nn.Conv2d(channel,1,kernel_size=(1,1))
self.softmax_channel=nn.Softmax(1)
self.softmax_spatial=nn.Softmax(-1)
self.ch_wz=nn.Conv2d(channel//2,channel,kernel_size=(1,1))
self.ln=nn.LayerNorm(channel)
self.sigmoid=nn.Sigmoid()
self.sp_wv=nn.Conv2d(channel,channel//2,kernel_size=(1,1))
self.sp_wq=nn.Conv2d(channel,channel//2,kernel_size=(1,1))
self.agp=nn.AdaptiveAvgPool2d((1,1))
def forward(self, x):
b, c, h, w = x.size()
#Channel-only Self-Attention
channel_wv=self.ch_wv(x) #bs,c//2,h,w
channel_wq=self.ch_wq(x) #bs,1,h,w
channel_wv=channel_wv.reshape(b,c//2,-1) #bs,c//2,h*w
channel_wq=channel_wq.reshape(b,-1,1) #bs,h*w,1
channel_wq=self.softmax_channel(channel_wq)
channel_wz=torch.matmul(channel_wv,channel_wq).unsqueeze(-1) #bs,c//2,1,1
channel_weight=self.sigmoid(self.ln(self.ch_wz(channel_wz).reshape(b,c,1).permute(0,2,1))).permute(0,2,1).reshape(b,c,1,1) #bs,c,1,1
channel_out=channel_weight*x
#Spatial-only Self-Attention
spatial_wv=self.sp_wv(x) #bs,c//2,h,w
spatial_wq=self.sp_wq(x) #bs,c//2,h,w
spatial_wq=self.agp(spatial_wq) #bs,c//2,1,1
spatial_wv=spatial_wv.reshape(b,c//2,-1) #bs,c//2,h*w
spatial_wq=spatial_wq.permute(0,2,3,1).reshape(b,1,c//2) #bs,1,c//2
spatial_wq=self.softmax_spatial(spatial_wq)
spatial_wz=torch.matmul(spatial_wq,spatial_wv) #bs,1,h*w
spatial_weight=self.sigmoid(spatial_wz.reshape(b,1,h,w)) #bs,1,h,w
spatial_out=spatial_weight*x
out=spatial_out+channel_out
return out
if __name__ == '__main__':
input=torch.randn(1,512,7,7)
psa = ParallelPolarizedSelfAttention(channel=512)
output=psa(input)
print(output.shape)
================================================
FILE: cv-attention/S2Attention.py
================================================
import numpy as np
import torch
from torch import nn
from torch.nn import init
def spatial_shift1(x):
b, w, h, c = x.size()
x[:, 1:, :, :c // 4] = x[:, :w - 1, :, :c // 4]
x[:, :w - 1, :, c // 4:c // 2] = x[:, 1:, :, c // 4:c // 2]
x[:, :, 1:, c // 2:c * 3 // 4] = x[:, :, :h - 1, c // 2:c * 3 // 4]
x[:, :, :h - 1, 3 * c // 4:] = x[:, :, 1:, 3 * c // 4:]
return x
def spatial_shift2(x):
b, w, h, c = x.size()
x[:, :, 1:, :c // 4] = x[:, :, :h - 1, :c // 4]
x[:, :, :h - 1, c // 4:c // 2] = x[:, :, 1:, c // 4:c // 2]
x[:, 1:, :, c // 2:c * 3 // 4] = x[:, :w - 1, :, c // 2:c * 3 // 4]
x[:, :w - 1, :, 3 * c // 4:] = x[:, 1:, :, 3 * c // 4:]
return x
class SplitAttention(nn.Module):
def __init__(self, channel=512, k=3):
super().__init__()
self.channel = channel
self.k = k
self.mlp1 = nn.Linear(channel, channel, bias=False)
self.gelu = nn.GELU()
self.mlp2 = nn.Linear(channel, channel * k, bias=False)
self.softmax = nn.Softmax(1)
def forward(self, x_all):
b, k, h, w, c = x_all.shape
x_all = x_all.reshape(b, k, -1, c) # bs,k,n,c
a = torch.sum(torch.sum(x_all, 1), 1) # bs,c
hat_a = self.mlp2(self.gelu(self.mlp1(a))) # bs,kc
hat_a = hat_a.reshape(b, self.k, c) # bs,k,c
bar_a = self.softmax(hat_a) # bs,k,c
attention = bar_a.unsqueeze(-2) # #bs,k,1,c
out = attention * x_all # #bs,k,n,c
out = torch.sum(out, 1).reshape(b, h, w, c)
return out
class S2Attention(nn.Module):
def __init__(self, channels=512):
super().__init__()
self.mlp1 = nn.Linear(channels, channels * 3)
self.mlp2 = nn.Linear(channels, channels)
self.split_attention = SplitAttention()
def forward(self, x):
b, c, w, h = x.size()
x = x.permute(0, 2, 3, 1)
x = self.mlp1(x)
x1 = spatial_shift1(x[:, :, :, :c])
x2 = spatial_shift2(x[:, :, :, c:c * 2])
x3 = x[:, :, :, c * 2:]
x_all = torch.stack([x1, x2, x3], 1)
a = self.split_attention(x_all)
x = self.mlp2(a)
x = x.permute(0, 3, 1, 2)
return x
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
s2att = S2Attention(channels=512)
output = s2att(input)
print(output.shape)
================================================
FILE: cv-attention/SE.py
================================================
import numpy as np
import torch
from torch import nn
from torch.nn import init
class SEAttention(nn.Module):
def __init__(self, channel=512,reduction=16):
super().__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction, bias=False),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel, bias=False),
nn.Sigmoid()
)
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
b, c, _, _ = x.size()
y = self.avg_pool(x).view(b, c)
y = self.fc(y).view(b, c, 1, 1)
return x * y.expand_as(x)
if __name__ == '__main__':
input=torch.randn(50,512,7,7)
se = SEAttention(channel=512,reduction=8)
output=se(input)
print(output.shape)
================================================
FILE: cv-attention/SGE.py
================================================
import numpy as np
import torch
from torch import nn
from torch.nn import init
class SpatialGroupEnhance(nn.Module):
def __init__(self, groups=8):
super().__init__()
self.groups=groups
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.weight=nn.Parameter(torch.zeros(1,groups,1,1))
self.bias=nn.Parameter(torch.zeros(1,groups,1,1))
self.sig=nn.Sigmoid()
self.init_weights()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
b, c, h,w=x.shape
x=x.view(b*self.groups,-1,h,w) #bs*g,dim//g,h,w
xn=x*self.avg_pool(x) #bs*g,dim//g,h,w
xn=xn.sum(dim=1,keepdim=True) #bs*g,1,h,w
t=xn.view(b*self.groups,-1) #bs*g,h*w
t=t-t.mean(dim=1,keepdim=True) #bs*g,h*w
std=t.std(dim=1,keepdim=True)+1e-5
t=t/std #bs*g,h*w
t=t.view(b,self.groups,h,w) #bs,g,h*w
t=t*self.weight+self.bias #bs,g,h*w
t=t.view(b*self.groups,1,h,w) #bs*g,1,h*w
x=x*self.sig(t)
x=x.view(b,c,h,w)
return x
if __name__ == '__main__':
input=torch.randn(50,512,7,7)
sge = SpatialGroupEnhance(groups=8)
output=sge(input)
print(output.shape)
================================================
FILE: cv-attention/SK.py
================================================
import numpy as np
import torch
from torch import nn
from torch.nn import init
from collections import OrderedDict
class SKAttention(nn.Module):
def __init__(self, channel=512, kernels=[1, 3, 5, 7], reduction=16, group=1, L=32):
super().__init__()
self.d = max(L, channel // reduction)
self.convs = nn.ModuleList([])
for k in kernels:
self.convs.append(
nn.Sequential(OrderedDict([
('conv', nn.Conv2d(channel, channel, kernel_size=k, padding=k // 2, groups=group)),
('bn', nn.BatchNorm2d(channel)),
('relu', nn.ReLU())
]))
)
self.fc = nn.Linear(channel, self.d)
self.fcs = nn.ModuleList([])
for i in range(len(kernels)):
self.fcs.append(nn.Linear(self.d, channel))
self.softmax = nn.Softmax(dim=0)
def forward(self, x):
bs, c, _, _ = x.size()
conv_outs = []
### split
for conv in self.convs:
conv_outs.append(conv(x))
feats = torch.stack(conv_outs, 0) # k,bs,channel,h,w
### fuse
U = sum(conv_outs) # bs,c,h,w
### reduction channel
S = U.mean(-1).mean(-1) # bs,c
Z = self.fc(S) # bs,d
### calculate attention weight
weights = []
for fc in self.fcs:
weight = fc(Z)
weights.append(weight.view(bs, c, 1, 1)) # bs,channel
attention_weughts = torch.stack(weights, 0) # k,bs,channel,1,1
attention_weughts = self.softmax(attention_weughts) # k,bs,channel,1,1
### fuse
V = (attention_weughts * feats).sum(0)
return V
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
se = SKAttention(channel=512, reduction=8)
output = se(input)
print(output.shape)
================================================
FILE: cv-attention/SequentialSelfAttention.py
================================================
import numpy as np
import torch
from torch import nn
from torch.nn import init
class SequentialPolarizedSelfAttention(nn.Module):
def __init__(self, channel=512):
super().__init__()
self.ch_wv=nn.Conv2d(channel,channel//2,kernel_size=(1,1))
self.ch_wq=nn.Conv2d(channel,1,kernel_size=(1,1))
self.softmax_channel=nn.Softmax(1)
self.softmax_spatial=nn.Softmax(-1)
self.ch_wz=nn.Conv2d(channel//2,channel,kernel_size=(1,1))
self.ln=nn.LayerNorm(channel)
self.sigmoid=nn.Sigmoid()
self.sp_wv=nn.Conv2d(channel,channel//2,kernel_size=(1,1))
self.sp_wq=nn.Conv2d(channel,channel//2,kernel_size=(1,1))
self.agp=nn.AdaptiveAvgPool2d((1,1))
def forward(self, x):
b, c, h, w = x.size()
#Channel-only Self-Attention
channel_wv=self.ch_wv(x) #bs,c//2,h,w
channel_wq=self.ch_wq(x) #bs,1,h,w
channel_wv=channel_wv.reshape(b,c//2,-1) #bs,c//2,h*w
channel_wq=channel_wq.reshape(b,-1,1) #bs,h*w,1
channel_wq=self.softmax_channel(channel_wq)
channel_wz=torch.matmul(channel_wv,channel_wq).unsqueeze(-1) #bs,c//2,1,1
channel_weight=self.sigmoid(self.ln(self.ch_wz(channel_wz).reshape(b,c,1).permute(0,2,1))).permute(0,2,1).reshape(b,c,1,1) #bs,c,1,1
channel_out=channel_weight*x
#Spatial-only Self-Attention
spatial_wv=self.sp_wv(channel_out) #bs,c//2,h,w
spatial_wq=self.sp_wq(channel_out) #bs,c//2,h,w
spatial_wq=self.agp(spatial_wq) #bs,c//2,1,1
spatial_wv=spatial_wv.reshape(b,c//2,-1) #bs,c//2,h*w
spatial_wq=spatial_wq.permute(0,2,3,1).reshape(b,1,c//2) #bs,1,c//2
spatial_wq=self.softmax_spatial(spatial_wq)
spatial_wz=torch.matmul(spatial_wq,spatial_wv) #bs,1,h*w
spatial_weight=self.sigmoid(spatial_wz.reshape(b,1,h,w)) #bs,1,h,w
spatial_out=spatial_weight*channel_out
return spatial_out
if __name__ == '__main__':
input=torch.randn(1,512,7,7)
psa = SequentialPolarizedSelfAttention(channel=512)
output=psa(input)
print(output.shape)
================================================
FILE: cv-attention/ShuffleAttention.py
================================================
import numpy as np
import torch
from torch import nn
from torch.nn import init
from torch.nn.parameter import Parameter
class ShuffleAttention(nn.Module):
def __init__(self, channel=512, reduction=16, G=8):
super().__init__()
self.G = G
self.channel = channel
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.gn = nn.GroupNorm(channel // (2 * G), channel // (2 * G))
self.cweight = Parameter(torch.zeros(1, channel // (2 * G), 1, 1))
self.cbias = Parameter(torch.ones(1, channel // (2 * G), 1, 1))
self.sweight = Parameter(torch.zeros(1, channel // (2 * G), 1, 1))
self.sbias = Parameter(torch.ones(1, channel // (2 * G), 1, 1))
self.sigmoid = nn.Sigmoid()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
@staticmethod
def channel_shuffle(x, groups):
b, c, h, w = x.shape
x = x.reshape(b, groups, -1, h, w)
x = x.permute(0, 2, 1, 3, 4)
# flatten
x = x.reshape(b, -1, h, w)
return x
def forward(self, x):
b, c, h, w = x.size()
# group into subfeatures
x = x.view(b * self.G, -1, h, w) # bs*G,c//G,h,w
# channel_split
x_0, x_1 = x.chunk(2, dim=1) # bs*G,c//(2*G),h,w
# channel attention
x_channel = self.avg_pool(x_0) # bs*G,c//(2*G),1,1
x_channel = self.cweight * x_channel + self.cbias # bs*G,c//(2*G),1,1
x_channel = x_0 * self.sigmoid(x_channel)
# spatial attention
x_spatial = self.gn(x_1) # bs*G,c//(2*G),h,w
x_spatial = self.sweight * x_spatial + self.sbias # bs*G,c//(2*G),h,w
x_spatial = x_1 * self.sigmoid(x_spatial) # bs*G,c//(2*G),h,w
# concatenate along channel axis
out = torch.cat([x_channel, x_spatial], dim=1) # bs*G,c//G,h,w
out = out.contiguous().view(b, -1, h, w)
# channel shuffle
out = self.channel_shuffle(out, 2)
return out
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
se = ShuffleAttention(channel=512, G=8)
output = se(input)
print(output.shape)
================================================
FILE: cv-attention/SimAM.py
================================================
import torch
import torch.nn as nn
class SimAM(torch.nn.Module):
def __init__(self, e_lambda=1e-4):
super(SimAM, self).__init__()
self.activaton = nn.Sigmoid()
self.e_lambda = e_lambda
def __repr__(self):
s = self.__class__.__name__ + '('
s += ('lambda=%f)' % self.e_lambda)
return s
@staticmethod
def get_module_name():
return "simam"
def forward(self, x):
b, c, h, w = x.size()
n = w * h - 1
x_minus_mu_square = (x - x.mean(dim=[2, 3], keepdim=True)).pow(2)
y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda)) + 0.5
return x * self.activaton(y)
if __name__ == '__main__':
input = torch.randn(3, 64, 7, 7)
model = SimAM()
outputs = model(input)
print(outputs.shape)
================================================
FILE: cv-attention/TripletAttention.py
================================================
import torch
import torch.nn as nn
class BasicConv(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
bn=True, bias=False):
super(BasicConv, self).__init__()
self.out_channels = out_planes
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
dilation=dilation, groups=groups, bias=bias)
self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
self.relu = nn.ReLU() if relu else None
def forward(self, x):
x = self.conv(x)
if self.bn is not None:
x = self.bn(x)
if self.relu is not None:
x = self.relu(x)
return x
class ZPool(nn.Module):
def forward(self, x):
return torch.cat((torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1)
class AttentionGate(nn.Module):
def __init__(self):
super(AttentionGate, self).__init__()
kernel_size = 7
self.compress = ZPool()
self.conv = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size - 1) // 2, relu=False)
def forward(self, x):
x_compress = self.compress(x)
x_out = self.conv(x_compress)
scale = torch.sigmoid_(x_out)
return x * scale
class TripletAttention(nn.Module):
def __init__(self, no_spatial=False):
super(TripletAttention, self).__init__()
self.cw = AttentionGate()
self.hc = AttentionGate()
self.no_spatial = no_spatial
if not no_spatial:
self.hw = AttentionGate()
def forward(self, x):
x_perm1 = x.permute(0, 2, 1, 3).contiguous()
x_out1 = self.cw(x_perm1)
x_out11 = x_out1.permute(0, 2, 1, 3).contiguous()
x_perm2 = x.permute(0, 3, 2, 1).contiguous()
x_out2 = self.hc(x_perm2)
x_out21 = x_out2.permute(0, 3, 2, 1).contiguous()
if not self.no_spatial:
x_out = self.hw(x)
x_out = 1 / 3 * (x_out + x_out11 + x_out21)
else:
x_out = 1 / 2 * (x_out11 + x_out21)
return x_out
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
triplet = TripletAttention()
output = triplet(input)
print(output.shape)
================================================
FILE: cv-attention/readme.md
================================================
# CV-Attention
关于CV的一些经典注意力机制代码。
目前代码格式主要用于yolov3,yolov5,yolov7,yolov8.
# Supports
| name | need_chaneel | paper |
| :----:| :----: | :----: |
| BAM | True | https://arxiv.org/pdf/1807.06514.pdf |
| CBAM | True | https://openaccess.thecvf.com/content_ECCV_2018/papers/Sanghyun_Woo_Convolutional_Block_Attention_ECCV_2018_paper.pdf |
| SE | True | https://arxiv.org/abs/1709.01507 |
| CoTAttention | True | https://arxiv.org/abs/2107.12292 |
| MobileViTAttention | True | https://arxiv.org/abs/2110.02178 |
| SimAM | False | http://proceedings.mlr.press/v139/yang21o/yang21o.pdf |
| SK | True | https://arxiv.org/pdf/1903.06586.pdf |
| ShuffleAttention | True | https://arxiv.org/pdf/2102.00240.pdf |
| S2Attention | True | https://arxiv.org/abs/2108.01072 |
| TripletAttention | False | https://arxiv.org/abs/2010.03045 |
| ECA | True | https://arxiv.org/pdf/1910.03151.pdf |
| ParNetAttention | True | https://arxiv.org/abs/2110.07641 |
| CoordAttention | True | https://arxiv.org/abs/2103.02907 |
| MHSA<br>Multi-Head-Self-Attention | True | https://wuch15.github.io/paper/EMNLP2019-NRMS.pdf |
| SGE | False | https://arxiv.org/pdf/1905.09646.pdf |
| A2Attention | True | https://arxiv.org/pdf/1810.11579.pdf |
| GC<br>Global Context Attention | True | https://arxiv.org/abs/1904.11492 |
| EffectiveSE<br>Effective Squeeze-Excitation | True | https://arxiv.org/abs/1911.06667 |
| GE<br>Gather-Excite Attention | True | https://arxiv.org/abs/1810.12348 |
| CrissCrossAttention | True | https://arxiv.org/abs/1811.11721 |
| Polarized Self-Attention | True | https://arxiv.org/abs/2107.00782 |
| Sequential Self-Attention | True | https://arxiv.org/abs/2107.00782 |
| GAM | True | https://arxiv.org/pdf/2112.05561v1.pdf |
| Biformer | True | https://arxiv.org/abs/2303.08810 |
| EMA | True | https://arxiv.org/abs/2305.13563v2 |
| CloAttention | True | https://arxiv.org/abs/2303.17803 |
| LSKBlock | True | https://arxiv.org/pdf/2303.09030.pdf |
| MLCA | True | https://www.sciencedirect.com/science/article/pii/S0952197623006267 |
| LSKA | True | https://arxiv.org/abs/2309.01439 |
| DAttention | True | https://openaccess.thecvf.com/content/CVPR2022/html/Xia_Vision_Transformer_With_Deformable_Attention_CVPR_2022_paper.html |
| ELA | True | https://arxiv.org/abs/2403.01123 |
| CAA | True | https://arxiv.org/pdf/2403.06258 |
| CPCA | True | https://arxiv.org/abs/2306.05196 |
# Install
安装命令:pip install timm einops efficientnet_pytorch -i https://pypi.tuna.tsinghua.edu.cn/simple
# Course
1. [yolov5添加注意力哔哩哔哩视频教学链接](https://www.bilibili.com/video/BV1s84y1775U) [yolov5添加注意力-补充事项-哔哩哔哩视频教学链接](https://www.bilibili.com/video/BV1hG4y1M71X)
2. [yolov7添加注意力哔哩哔哩视频教学链接](https://www.bilibili.com/video/BV1pd4y1H7BK)
3. [yolov8添加注意力哔哩哔哩视频教学链接](https://www.bilibili.com/video/BV1ZQ4y1J7oC/) [yolov8添加注意力进阶版哔哩哔哩视频教学链接](https://www.bilibili.com/video/BV1ZQ4y1J7oC/)
# Reference
https://github.com/xmu-xiaoma666/External-Attention-pytorch
https://github.com/rwightman/pytorch-image-models
https://github.com/rayleizhu/BiFormer
https://github.com/XiaLiPKU/EMANet
https://github.com/qhfan/CloFormer/tree/main
https://github.com/zcablii/LSKNet
https://github.com/wandahangFY/MLCA
https://github.com/StevenLauHKHK/Large-Separable-Kernel-Attention
https://github.com/LeapLabTHU/DAT
https://github.com/NUST-Machine-Intelligence-Laboratory/PKINet
https://github.com/Cuthbert-Huang/CPCANet
================================================
FILE: cvpr2025-deim-project.md
================================================
# 2025-SOTA目标检测模型项目(2026发论文必备项目)
鉴于目前YOLO系列模型反映的拒稿率越来越高且YOLO模型确实非常泛滥,无论是不是计算机专业、是不是小白都基本可以快速上手YOLO模型,导致计算机专业和有期刊级别要求的小伙伴日益难受,简单来说就是YOLO在学术界的红利已经基本吃透,目前开始越来越多人转CVPR2024-RTDETR,而且目前研究生毕业一年比一年难,不像以前随便结合点深度学习就可以毕业,就像越来越多人反馈,导师已经明确禁止不能用YOLO,再加上这么多年来YOLO对学术的灌水已经让审稿人出现视觉疲劳,带上了”有色”眼镜看待YOLO,所以结合以上众多原因,因此我们需要一个有一定上手难度且是顶会的模型来支撑我们后续的大小论文的工作。
PS:20250614版本更新后,本项目的dfine和cvpr2025-deimv1已经支持Ultralytics同款的配置文件形式,大大降低上手难度
### 1. 这个项目包含什么模型?
这个项目的源代码来自:[DEIM](https://github.com/ShihuaHuang95/DEIM)
其内部可以跑以下模型(以下模型支持目标检测,DFine、DEIM支持实例分割,不支持姿态检测、旋转目标检测):
1. CVPR2025-DEIM
2. ICLR2025-DFine
3. RTDETRV2
4. DEIMV2
选择这个课程,这些模型都可以改进,不限于DEIM,这些都是顶会的模型,不要说2025,就算是2026、2027都不落后!还有一个重点就是像CVPR2024-RTDETR,最小的模型也有50GFLOPs,但是现在的DEIM和DFine都有像YOLO一样的Nano大小版本的模型,变相降低了训练成本和设备要求!(建议最低12G显存的显卡起步)
### 2. 这个项目会以什么形式开展?
1. 这个项目跟以往区别比较大,我们其他改进项目都是直接提供好修改好的代码,用户不需要懂代码的情况下也可以开始做实验,甚至可以做完实验,但是这样也有一个不好的点,就是会大幅度降低上手门槛,这特别对计算机专业的同学来说是非常不利的,因此这个项目在代码工程方面,这个项目我们会有教程教大家怎么去调试程序、修改代码、添加模块。
2. 这个项目会**不定时(直播时间到时候会群里进行通知,没有硬性规定多久一次,不方便看的会有录播)**有**直播**,详细直播内容请看第三大点。
3. 这个项目会持续更新创新点,如果创新点是来源于现有的模型,还会提供对应的论文及其中文翻译版本(假设像FasterNet中的FasterBlock,会提供好对应的py文件、原论文及其中文翻译版本),用户可以根据从本课程学习到的缝合模块(代指第一点)去定制或者创新自己的网络。
4. 附带答疑群,答疑群主要答疑的内容是实验、代码操作、代码报错等相关问题(经过YOLO、RTDETR大量的经验,我没法保证每一个问题都能回复到大家,只能保证遇到过的问题会给大家提供建议和方向,当然群内的一些高频问题,我也会收集起来挑出部分出视频或者直播给大家进行解答)。
5. 如果后续有剪枝、蒸馏,不需要额外付费,本项目会包含在内,所以性价比真的非常高,YOLO改进剪枝蒸馏三件套也要200多了。
### 3. 直播内容
1. 解答群内一些高频疑问,比如很多人都会遇到的报错、或者注意点。
2. 教大家如何去做二次创新(PS:这个不是口头给大家说怎么二次创新,而是从代码的层面带大家去实践二次创新。可能这里会有同学问,那自研创新呢?你会自研模块的前提是必须要懂如何二次创新,首先这是一个过程,然后我有很多自研模块是突然有的想法或者看论文看到某些结构与之前看到的论文联合后有新的想法,所以也很难描述我为什么就想到这个结构,大多数情况下,只需要会有一定复杂度的二次创新就足够,当然自研模块有机会我也会去讲)
3. 给大家从浅到深解说一些我认为比较经典的模块,提高自己能创新新模块的能力和基础,因为很多模块都是相通的,本质没有变,只是模块上的组合体替换。(有不少人私聊我说,能不能出些你是如何结合一些现有的模块去创新的,虽然现在B站上也有不少讲创新点的,但是他们的感觉就是从头到尾读一篇代码,我看了几次之后觉得我把代码扔给GPT给我打上注释的感觉是一样的,看的时候感觉哦哦哦这样,看完后就不知所然)
### 3. 入手本项目需要注意些什么?
1. 因为本项目完全不是像之前YOLO项目这样傻瓜式操作,所以本项目有一定难度,具有以下特征的小伙伴不建议入手。(看到这里可能有人会问,为什么不考虑把DEIM、DFine、RTDETRV2都移植到Ultralytics?因为这个不确定性太大,DETR类型的模型对参数非常敏感,可能有一点参数不合适,效果就会大打折扣,但是对于这种较为复杂的模型移植过程中又很难保证一比一全过程移植)
- 未入门、100%纯小白(如果你有心学,这个不是问题)
- 不太想花太多时间去学,搞这个只是想为了水个无要求的论文就行
- 没有任何解决问题的能力(如果你有心学,这个不是问题)
- 从来不看使用文档、说明之类的(强烈不建议入手)
- 此项目上手需要时间,如果想无脑直接跑就不合适购入
最后补充!如果你具有以上特征,但又要求期刊不能太水或者不能做yolo的问题,尽早入手CVPR2024-RTDETR吧,去年没抓上,今年不能再等了,模型红利可不等人。
2. 入手前可以先去B站看一下[CVPR025-DEIM合集里面的教程](https://space.bilibili.com/286900343/lists/4909499),最起码先跑通过DEIM原始模型,能跟着视频训练和测试,然后也把合集里面的基础课程都先看一下,为后面打好基础。
3. 我认为这个不是什么不可达到的事,就看你想不想毕业了,有志者事竟成。
PS:20250614版本更新后,本项目的dfine和deim已经支持Ultralytics同款的配置文件形式,大大降低上手难度
### 4. 价格
1. 本项目价格为288,没有时效限制。(与其150、200买个YOLO纯模型改进专栏,不如288买个2025-SOTA专栏,最起码不用怕花了钱,最后做的YOLO还投不出去,还毕不了业)
2. 虚拟项目一经售出不退不换,需要入手前考虑清楚,如果你是初次入手我的项目,怕我不靠谱,可以先考虑入手个YOLO和RTDETR看下。
### 5. 项目使用问题
1. 购买本项目的使用者都会得到一个独一无二的用于解压7z的密码,到时候用于解压对应的压缩包,此密码自己妥善保管,请勿告诉他人。
2. 本项目的视频和直播回放统一都是加密视频,每个购买者都可以得到一个激活码,激活码在每个人专属的7z压缩文件内。
### 6. 项目更新公告
- 20250330
1. 初版项目发布.
- 20250413
1. 新增多个改进模块并新增模块简介,位置在engine/extre_module/module_images内。
2. 新增训练和测试阶段的进度条显示。
3. 优化tensorboard中的精度名称显示。
4. 优化输出,把重要信息换颜色显示。
5. 新增plot_train_batch_freq参数,用于控制间隔多少epoch保存第一个batch中的数据增强后的图像,默认为12。
6. 新增保存当前参数信息,会自动保存到output_dir中的args.json文件内。
7. 优化output_dir保存逻辑,当判断output_dir路径存在的时候,会自动在后缀加1,避免覆盖原先代码。
- 20250419
1. 新增verbose_type参数,用于控制使用默认还是进度条输出,默认为官方默认输出形式。
2. 新增thop计算模型计算量方式,避免calflops对于部分算子出现不支持报错的操作。
3. 完善每个模块的py文件,增加输出计算量和参数量等数值,方便用户后续调试。
4. 给DataLoader中添加pin_memory参数为True,可以在训练时候如果是数据加载成为瓶颈,可以提高速度。
5. 修复用户反馈的已知问题。
6. 新增多个改进模块。
- 20250429
1. 修复engine/extre_module/custom_nn/attention/SEAM.py模块,应该是MutilSEAM。
2. 新增一些进阶课程的视频。
3. 新增多个改进模块。
4. 修复用户反馈的已知问题。
5. 修复续训时候会新增一个保存路径的问题。
6. 修复多卡训练Stage2的时候会出现部分进程找不到权重文件的问题。
- 20250514
1. 新增一些进阶课程的视频。
2. 新增多个改进模块。
3. 修复用户反馈的已知问题。
- 20250526
1. 新增一些进阶课程的视频。
2. 新增多个改进模块。
3. 新增cache_ram参数,详细可以看userguide。
4. 修复在torch2.7.0下出现的NotImplementedError问题。
- 20250609
1. 修复新增了cache_ram功能后训练COCO数据集精度不正常的问题。
2. 修复在训练COCO数据集中数据增强的绘制BUG。
3. 新增多个改进模块。
4. 新增一些进阶课程的视频。
5. 修复用户反馈的已知问题。
- 20250614
1. 新增Ultralytics的配置文件方式,大大降低改进难度。
2. 新增一些<Ultralytics的配置文件方式>进阶课程的视频。
3. 新增多个改进模块。
- 20250617
1. 修复配置文件中层序号有误的问题。
- 20250619
1. 修复配置文件中层序号有误的问题。
2. 新增多个改进模块。
3. 新增一些<Ultralytics的配置文件方式>进阶课程的视频。
- 20250625
1. 修复best_stg2保存异常的问题。
2. 新增YOLOV13中的HyperACE模块。
3. 新增多个关于<Ultralytics的配置文件方式>进阶课程的视频。
- 20250705
1. 新增多个改进模块。
2. 新增多个关于<Ultralytics的配置文件方式>进阶课程的视频。
3. 新增20250704基础疑问解答直播回放链接。
- 20250714
1. 新增多个改进模块。
2. 新增多个关于<Ultralytics的配置文件方式>进阶课程的视频。
3. 新增小目标检测网络架构专题一群课题直播回放。
- 20250726
1. 新增在test-only的状态下输出每个类别的'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'。
2. 新增多个改进模块。
3. 修复用户反馈的已知问题。
4. 新增一个JSON格式数据集脚本。(输出类别数和类别id、输出每个类别的实例数量)
- 20250817
1. 新增支持蒸馏学习,蒸馏学习支持断点续训使用方法跟正常训练一样。
2. 蒸馏学习支持特征蒸馏、逻辑蒸馏、特征+逻辑蒸馏 这三种方式。
3. 无论是Ultralytics配置文件方式、还是原始的代码方式都支持相互蒸馏。
4. 蒸馏学习支持控制epoch,例如只有前50epoch进行蒸馏学习,后50epoch关闭蒸馏学习。
5. 更多细节请看关于<知识蒸馏教学视频>的进阶课程。
6. 支持输出YOLO指标(Precision、Recall、F1-Score、mAP50、mAP75、mAP50-95),详细请看userguide。
7. 新增多个改进模块。
8. 新增小目标检测网络架构专题二链接。
- 20250823
1. 修复YOLO指标在一些图片没真实标签的时候报错的bug。
2. 开放逻辑蒸馏,在项目内有对应的课程。
3. 新增多个改进模块。
4. 新增<知识蒸馏教学视频>的进阶课程。
- 20250907
1. 新增多个改进模块。
2. 修复蒸馏学习中教师信息输出错误的问题。
- 20250921
1. 新增导出脚本(export.py),支持导出onnx、tensorrt模型。
2. 重构大部分输出,增加输出对应的时间、文件、函数、行数,以便用户快速定位。
3. 新增20250910直播回放链接。
4. 修复一些已知BUG。
5. 完善onnx、tensorrt模型推理脚本。
6. 支持在train.py test-only状态下中使用onnx、tensorrt模型进行验证。
7. 新增<模型导出>相关教程视频。
8. 新增多个改进模块。
9. 支持DINOV3(ConvNext、ViT)作为主干进行微调。<教程在百度云创新课题的第五点>
- 20251012
1. 移植DEIMV2到本项目,暂只支持原始的代码修改方式。
2. 更新UserGuide。
3. 新增<DEIMV2说明视频>。
4. 修复一些已知问题。
- 20251025
1. 新增DQ-DETR的模块。
2. 新增多个改进模块。
3. 新增<DQ-DETR改进点>的相关教程视频。
4. 修复一些已知问题。
- 20251102
1. 新增<DQ-DETR改进点>的相关教程视频。
2. 修复一些已知问题。
- 20251115
1. 新增以DensityMap为主导的创新课程[DFINE with Density-aware Query Selection]。
2. 修复一些已知问题。
- 20251207
1. 新增在test-only状态下,yolo-metrice支持保存混淆矩阵。
2. 新增DFine、DEIM实例分割的实现,使用相关请看进阶教程实例分割部分。
3. 更新dataset/coco_analyzer.py脚本,支持输出数据集中更多的内容,以便分析数据集的特点。
4. 新增tools/visualization/tp_fp_fn_analysis.py脚本,用于分析检测结果中的tp、fp、fn。
5. 新增多个改进模块。
6. 修复一些已知问题。
7. 新增<TGRS2025-HighFrequencyDirectionInjection创新思想课程>。
8. 新增基于ByteTrack的目标跟踪,教程请看进阶教程内的<目标跟踪ByteTrack的使用教程>。
- 20251213
1. 参考CVPR2022-MaskDINO重构实例分割检测头代码。
2. 修复在ram_cache状态下实例分割数据集部分存在的BUG。
3. 重新录制实例分割部分的进阶视频。
- 20251224
1. 新增多个改进模块。
2. 修复实例分割部分已知的问题。
3. 新增以DensityMap为主导的实例分割检测头内容[DFINESeg with Density-aware Query Selection]。
4. 新增[DFINESeg with Density-aware Query Selection]的使用视频教程。
5. 更新实例分割实现讲解。
- 20251226
1. 修复一些已知问题。
2. 新增基于COCO-Tiny指标,并支持输出每类COCO-Tiny指标,详细请看UserGuide.md中的<项目内yml一些额外参数说明>。
- 20260109
1. 修复一些已知问题。
2. 新增<ES-MoE>动态路由网络模块。
3. 更新视频链接。
- 20260128
1. 修复一些已知问题。
2. 新增多个改进模块。
3. 新增<ES-MoE>动态路由网络教程视频。
4. 新增<TPAMI2025 YOLO-MS>的MSBlock和GQL的教程视频。
- 20260224
1. 修复一些已知问题。
2. 新增多个改进模块。
3. compile_module的编译模块支持50系显卡。
4. 为了兼容50系用户,新版的环境统一修改成torch2.8.0,旧版本的用户不影响。
- 20260310
1. 新增diou, ciou, eiou, siou, shapeiou, piou, piou2。
2. 支持TIMM中的主干进行训练。
3. DINOV3版本支持Ultralytics版本训练。
4. 新增AAAI2026-SPJFB模块。
5. 新增TGRS2025-GLSS2D模块。
6. 新增TIP2025-CAFM模块。
7. 新增TIP2025-DWM_MSA模块。
8. 新增DynamicERF模块。
9. 新增如何使用其他IOU的操作视频。
10. 新增TIMM主干的操作视频。
11. yolo_metrice参数从默认为False改为True,代表训练过程中YOLO和COCO指标都会一并输出。
### 7. 目前已有的模块
- engine/extre_module/custom_nn/attention
1. engine/extre_module/custom_nn/attention/SEAM.py
2. CVPR2021|engine/extre_module/custom_nn/attention/ca.py
3. ICASSP2023|engine/extre_module/custom_nn/attention/ema.py
4. ICML2021|engine/extre_module/custom_nn/attention/simam.py
5. ICCV2023|engine/extre_module/custom_nn/attention/lsk.py
6. WACV2024|engine/extre_module/custom_nn/attention/DeformableLKA.py
7. engine/extre_module/custom_nn/attention/mlca.py
8. BIBM2024|engine/extre_module/custom_nn/attention/FSA.py
9. AAAI2025|engine/extre_module/custom_nn/attention/CDFA.py
10. engine/extre_module/custom_nn/attention/GLSA.py
11. TGRS2025|engine/extre_module/custom_nn/attention/MCA.py
12. CVPR2025|engine/extre_module/custom_nn/attention/CASAB.py
13. NN2025|engine/extre_module/custom_nn/attention/KSFA.py
14. TPAMI2025|engine/extre_module/custom_nn/attention/GQL.py
15. TGRS2025|engine/extre_module/custom_nn/attention/ACA.py
16. TGRS2025|engine/extre_module/custom_nn/attention/DHPF.py
17. TGRS2025|engine/extre_module/custom_nn/attention/ACAB.py
- engine/extre_module/custom_nn/block
1. engine/extre_module/custom_nn/block/RepHMS.py
2. 自研模块|engine/extre_module/custom_nn/block/rgcspelan.py
3. TPAMI2025|engine/extre_module/custom_nn/block/MANet.py
- engine/extre_module/custom_nn/conv_module
1. CVPR2021|engine/extre_module/custom_nn/conv_module/dbb.py
2. IEEETIP2024|engine/extre_module/custom_nn/conv_module/deconv.py
3. ICCV2023|engine/extre_module/custom_nn/conv_module/dynamic_snake_conv.py
4. CVPR2023|engine/extre_module/custom_nn/conv_module/pconv.py
5. AAAI2025|engine/extre_module/custom_nn/conv_module/psconv.py
6. CVPR2025|engine/extre_module/custom_nn/conv_module/ShiftwiseConv.py
7. engine/extre_module/custom_nn/conv_module/wdbb.py
8. engine/extre_module/custom_nn/conv_module/deepdbb.py
9. ECCV2024|engine/extre_module/custom_nn/conv_module/wtconv2d.py
10. CVPR2023|engine/extre_module/custom_nn/conv_module/ScConv.py
11. engine/extre_module/custom_nn/conv_module/dcnv2.py
12. CVPR2024|engine/extre_module/custom_nn/conv_module/DilatedReparamConv.py
13. engine/extre_module/custom_nn/conv_module/gConv.py
14. CVPR2024|engine/extre_module/custom_nn/conv_module/IDWC.py
15. engine/extre_module/custom_nn/conv_module/DSA.py
16. CVPR2025|engine/extre_module/custom_nn/conv_module/FDConv.py
17. CVPR2023|engine/extre_module/custom_nn/conv_module/dcnv3.py
18. CVPR2024|engine/extre_module/custom_nn/conv_module/dcnv4.py
19. CVPR2024|engine/extre_module/custom_nn/conv_module/DynamicConv.py
20. CVPR2024|engine/extre_module/custom_nn/conv_module/FADC.py
21. CVPR2023|engine/extre_module/custom_nn/conv_module/SMPConv.py
22. MIA2025|engine/extre_module/custom_nn/conv_module/FourierConv.py
23. CVPR2024|engine/extre_module/custom_nn/conv_module/SFSConv.py
24. ICCV2025|engine/extre_module/custom_nn/conv_module/MBRConv.py
25. ICCV2025|engine/extre_module/custom_nn/conv_module/ConvAttn.py
26. ICCV2025|engine/extre_module/custom_nn/conv_module/Converse2D.py
27. CVPR2025|engine/extre_module/custom_nn/conv_module/gcconv.py
28. ACCV2024|engine/extre_module/custom_nn/conv_module/RMBC.py
- engine/extre_module/custom_nn/upsample
1. CVPR2024|engine/extre_module/custom_nn/upsample/eucb.py
2. CVPR2024|engine/extre_module/custom_nn/upsample/eucb_sc.py
3. engine/extre_module/custom_nn/upsample/WaveletUnPool.py
4. ICCV2019|engine/extre_module/custom_nn/upsample/CARAFE.py
5. ICCV2023|engine/extre_module/custom_nn/upsample/DySample.py
6. ICCV2025|engine/extre_module/custom_nn/upsample/Converse2D_Up.py
7. CVPR2025|engine/extre_module/custom_nn/upsample/DSUB.py
- engine/extre_module/custom_nn/downsample
1. IEEETIP2020|engine/extre_module/custom_nn/downsample/gcnet.py
2. 自研模块|engine/extre_module/custom_nn/downsample/lawds.py
3. engine/extre_module/custom_nn/downsample/WaveletPool.py
4. engine/extre_module/custom_nn/downsample/ADown.py
5. engine/extre_module/custom_nn/downsample/YOLOV7Down.py
6. engine/extre_module/custom_nn/downsample/SPDConv.py
7. engine/extre_module/custom_nn/downsample/HWD.py
8. engine/extre_module/custom_nn/downsample/DRFD.py
9. TGRS2025|engine/extre_module/custom_nn/conv_module/FSConv.py
- engine/extre_module/custom_nn/stem
1. engine/extre_module/custom_nn/stem/SRFD.py
2. engine/extre_module/custom_nn/stem/LoG.py
3. ICCV2023|engine/extre_module/custom_nn/stem/RepStem.py
- engine/extre_module/custom_nn/featurefusion
1. 自研模块|engine/extre_module/custom_nn/featurefusion/cgfm.py
2. BMVC2024|engine/extre_module/custom_nn/featurefusion/msga.py
3. CVPR2024|engine/extre_module/custom_nn/featurefusion/mfm.py
4. IEEETIP2023|engine/extre_module/custom_nn/featurefusion/CSFCN.py
5. BIBM2024|engine/extre_module/custom_nn/featurefusion/mpca.py
6. ACMMM2024|engine/extre_module/custom_nn/featurefusion/wfu.py
7. CVPR2025|engine/extre_module/custom_nn/featurefusion/GDSAFusion.py
8. engine/extre_module/custom_nn/featurefusion/PST.py
9. TGRS2025|engine/extre_module/custom_nn/featurefusion/MSAM.py
10. INFFUS2025|engine/extre_module/custom_nn/featurefusion/DPCF.py
11. CVRP2025|engine/extre_module/custom_nn/featurefusion/LCA.py
12. TGRS2025|engine/extre_module/custom_nn/featurefusion/HFFE.py
13. TGRS2025|engine/extre_module/custom_nn/featurefusion/MFPM.py
14. TGRS2025|engine/extre_module/custom_nn/featurefusion/ERM.py
15. TIP2025|engine/extre_module/custom_nn/featurefusion/CAFM.py
- engine/extre_module/custom_nn/module
1. AAAI2025|engine/extre_module/custom_nn/module/APBottleneck.py
2. CVPR2025|engine/extre_module/custom_nn/module/efficientVIM.py
3. CVPR2023|engine/extre_module/custom_nn/module/fasterblock.py
4. CVPR2024|engine/extre_module/custom_nn/module/starblock.py
5. engine/extre_module/custom_nn/module/DWR.py
6. CVPR2024|engine/extre_module/custom_nn/module/UniRepLKBlock.py
7. CVPR2025|engine/extre_module/custom_nn/module/mambaout.py
8. AAAI2024|engine/extre_module/custom_nn/module/DynamicFilter.py
9. engine/extre_module/custom_nn/module/StripBlock.py
10. TGRS2024|engine/extre_module/custom_nn/module/elgca.py
11. CVPR2024|engine/extre_module/custom_nn/module/LEGM.py
12. ICCV2023|engine/extre_module/custom_nn/module/iRMB.py
13. TPAMI2025|engine/extre_module/custom_nn/module/MSBlock.py
14. ICLR2024|engine/extre_module/custom_nn/module/FATBlock.py
15. CVPR2024|engine/extre_module/custom_nn/module/MSCB.py
16. engine/extre_module/custom_nn/module/LEGBlock.py
17. CVPR2025|engine/extre_module/custom_nn/module/RCB.py
18. ECCV2024|engine/extre_module/custom_nn/module/JDPM.py
19. CVPR2025|engine/extre_module/custom_nn/module/vHeat.py
20. CVPR2025|engine/extre_module/custom_nn/module/EBlock.py
21. CVPR2025|engine/extre_module/custom_nn/module/DBlock.py
22. ECCV2024|engine/extre_module/custom_nn/module/FMB.py
23. CVPR2024|engine/extre_module/custom_nn/module/IDWB.py
24. ECCV2022|engine/extre_module/custom_nn/module/LFE.py
25. AAAI2025|engine/extre_module/custom_nn/module/FCM.py
26. CVPR2024|engine/extre_module/custom_nn/module/RepViTBlock.py
27. CVPR2024|engine/extre_module/custom_nn/module/PKIModule.py
28. CVPR2024|engine/extre_module/custom_nn/module/camixer.py
29. ICCV2025|engine/extre_module/custom_nn/module/ESC.py
30. CVPR2025|engine/extre_module/custom_nn/module/nnWNet.py
31. TGRS2025|engine/extre_module/custom_nn/module/ARF.py
32. AAAI2024|engine/extre_module/custom_nn/module/CFBlock.py
33. IJCV2024|engine/extre_module/custom_nn/module/FMA.py
34. engine/extre_module/custom_nn/module/LWGA.py
35. TGRS2025|engine/extre_module/custom_nn/module/CSSC.py
36. TGRS2025|engine/extre_module/custom_nn/module/CNCM.py
37. ICCV2025|engine/extre_module/custom_nn/module/HFRB.py
38. ICIP2025|engine/extre_module/custom_nn/module/EVA.py
39. CVPR2025|engine/extre_module/custom_nn/module/IEL.py
40. MICCAI2023|engine/extre_module/custom_nn/module/MFEBlock.py
41. AAAI2026|engine/extre_module/custom_nn/module/PartialNetBlock.py
42. TGRS2025|engine/extre_module/custom_nn/module/DRG.py
43. engine/extre_module/custom_nn/module/Wave2D.py
44. TGRS2025|engine/extre_module/custom_nn/module/GLGM.py
45. TGRS2025|engine/extre_module/custom_nn/module/MAC.py
46. AAAI2026|engine/extre_module/custom_nn/module/SPJFB.py
- engine/extre_module/custom_nn/neck
1. 自研模块|engine/extre_module/custom_nn/neck/FDPN.py
- engine/extre_module/custom_nn/neck_module
1. TPAMI2025|engine/extre_module/custom_nn/neck_module/HyperCompute.py
2. engine/extre_module/custom_nn/neck_module/HyperACE.py
3. engine/extre_module/custom_nn/neck_module/GoldYOLO.py
4. AAAI2025|engine/extre_module/custom_nn/neck_module/HS_FPN.py
- engine/extre_module/custom_nn/norm
1. ICML2024|engine/extre_module/custom_nn/transformer/repbn.py
2. CVPR2025|engine/extre_module/custom_nn/transformer/dyt.py
3. engine/extre_module/custom_nn/norm/derf.py
- engine/extre_module/custom_nn/transformer
1. ICLR2025|engine/extre_module/custom_nn/transformer/PolaLinearAttention.py
2. CVPR2023|engine/extre_module/custom_nn/transformer/biformer.py
3. CVPR2023|engine/extre_module/custom_nn/transformer/CascadedGroupAttention.py
4. CVPR2022|engine/extre_module/custom_nn/transformer/DAttention.py
5. ICLR2022|engine/extre_module/custom_nn/transformer/DPBAttention.py
6. CVPR2024|engine/extre_module/custom_nn/transformer/AdaptiveSparseSA.py
7. engine/extre_module/custom_nn/transformer/GSA.py
8. engine/extre_module/custom_nn/transformer/RSA.py
9. ECCV2024|engine/extre_module/custom_nn/transformer/FSSA.py
10. AAAI2025|engine/extre_module/custom_nn/transformer/DilatedGCSA.py
11. AAAI2025|engine/extre_module/custom_nn/transformer/DilatedMWSA.py
12. CVPR2024|engine/extre_module/custom_nn/transformer/SHSA.py
13. IJCAI2024|engine/extre_module/custom_nn/transformer/CTA.py
14. IJCAI2024|engine/extre_module/custom_nn/transformer/SFA.py
15. engine/extre_module/custom_nn/transformer/MSLA.py
16. ACMMM2025|engine/extre_module/custom_nn/transformer/CPIA_SA.py
17. NN2025|engine/extre_module/custom_nn/transformer/TokenSelectAttention.py
18. CVPR2025|engine/extre_module/custom_nn/transformer/TAB.py
19. TPAMI2025|engine/extre_module/custom_nn/transformer/LRSA.py
20. ICCV2025|engine/extre_module/custom_nn/transformer/MALA.py
21. ICML2023|engine/extre_module/custom_nn/transformer/MUA.py
22. ACMMM2025|engine/extre_module/custom_nn/transformer/EGSA.py
23. ACMMM2025|engine/extre_module/custom_nn/transformer/SWSA.py
24. AAAI2026|engine/extre_module/custom_nn/transformer/DHOGSA.py
25. NeurIPS2025|engine/extre_module/custom_nn/transformer/CBSA.py
26. TGRS2025|engine/extre_module/custom_nn/transformer/DPWA.py
27. TIP2025|engine/extre_module/custom_nn/transformer/DWM_MSA.py
- engine/extre_module/custom_nn/mlp
1. CVPR2024|engine/extre_module/custom_nn/mlp/ConvolutionalGLU.py
2. IJCAI2024|engine/extre_module/custom_nn/mlp/DFFN.py
3. ICLR2024|engine/extre_module/custom_nn/mlp/FMFFN.py
4. CVPR2024|engine/extre_module/custom_nn/mlp/FRFN.py
5. ECCV2024|engine/extre_module/custom_nn/mlp/EFFN.py
6. WACV2025|engine/extre_module/custom_nn/mlp/SEFN.py
7. ICLR2025|engine/extre_module/custom_nn/mlp/KAN.py
8. CVPR2025|engine/extre_module/custom_nn/mlp/EDFFN.py
9. ICVJ2024|engine/extre_module/custom_nn/mlp/DML.py
10. AAAI2026|engine/extre_module/custom_nn/mlp/DIFF.py
- engine/extre_module/custom_nn/mamba
1. AAAI2025|engine/extre_module/custom_nn/mamba/SS2D.py
2. CVPR2025|engine/extre_module/custom_nn/mamba/ASSM.py
3. CVPR2025|engine/extre_module/custom_nn/mamba/SAVSS.py
4. CVPR2025|engine/extre_module/custom_nn/mamba/MobileMamba/mobilemamba.py
5. CVPR2025|engine/extre_module/custom_nn/mamba/MaIR.py
6. TGRS2025|engine/extre_module/custom_nn/mamba/GLVSS.py
7. ICCV2025|engine/extre_module/custom_nn/mamba/VSSD.py
8. ICCV2025|engine/extre_module/custom_nn/mamba/TinyViM.py
9. INFFUS2025|engine/extre_module/custom_nn/mamba/CSI.py
10. TIP2025|engine/extre_module/custom_nn/mamba/SFMB.py
11. TGRS2025|engine/extre_module/custom_nn/mamba/GLSS.py
12. TGRS2025|engine/extre_module/custom_nn/mamba/GLSS2D.py
- engine/extre_module/custom_nn/moe
1. engine/extre_module/custom_nn/moe/moe_module.py
- engine/extre_module/custom_nn/featurepreprocess
1. TGRS2025|engine/extre_module/custom_nn/featurepreprocess/FAENet.py
- 积木模块,示例教程engine/extre_module/custom_nn/module/example.py
1. YOLOV5|C3
2. YOLOV8|C2f
3. YOLO11|C3k2
4. TPAMI2025|MANet
5. TPAMI2024|MetaFormer_Block
6. TPAMI2024+CVPR2025|MetaFormer_Mona
7. TPAMI2024+CVPR2025+WACV2025|MetaFormer_SEFN
8. TPAMI2024+CVPR2025+WACV2025|MetaFormer_Mona_SEFN
- 创新课程代码<标识着是那个课程中的代码,详细可以去看对应的课程视频>
1. 顶会中的Partial创新思想课程|engine/extre_module/innovate/CVPR2020_GhostConv.py
2. 顶会中的Partial创新思想课程|engine/extre_module/innovate/CVPR2023_PartialConv.py
3. CVPR2025-MobileMamba中的Long-Range WTB-Mamba二次创新|engine/extre_module/innovate/CVPR2025_MobileMamba.py
4. TGRS2025-HighFrequencyDirectionInjection创新思想课程|engine/extre_module/innovate/TGRS2025_HFDI.py
================================================
FILE: damo-yolo/Annotations/ReadMe.md
================================================
# 存放VOC标注格式的文件夹
================================================
FILE: damo-yolo/JPEGImages/ReadMe.md
================================================
# 存放图像的文件夹
================================================
FILE: damo-yolo/readme.md
================================================
# DAMO-YOLO的数据集处理文件
本目录下的脚本是针对与DAMO-YOLO的数据集处理脚本,支持如下:
1. VOC标注格式转换为COCO标注格式,并生成train.json,val.json,test.json.
# 使用方法
1. 把图片存放在JPEGImages中,图片后缀需要一致,比如都是jpg或者png等等,不支持混合的图片后缀格式,比如一些是jpg,一些是png。
2. 把VOC标注格式的XML文件存放在Annotations中。
3. 运行voc2coco.py,其中postfix参数是JPEGImages的图片后缀,train_ratio是训练集的比例,val_ratio是验证集的比例,剩下的就是测试集的比例。
================================================
FILE: damo-yolo/voc2coco.py
================================================
import os
import glob
import json
import shutil
import numpy as np
import xml.etree.ElementTree as ET
START_BOUNDING_BOX_ID = 1
def find_classes(path):
classes = []
for i in os.listdir(path):
try:
in_file = open(os.path.join(path, i), encoding='utf-8')
tree=ET.parse(in_file)
root = tree.getroot()
for obj in root.iter('object'):
difficult = 0
if obj.find('difficult')!=None:
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes:
classes.append(cls)
except Exception as e:
print(os.path.join(path, i), e)
return classes
def get(root, name):
return root.findall(name)
def get_and_check(root, name, length):
vars = root.findall(name)
if len(vars) == 0:
raise NotImplementedError('Can not find %s in %s.'%(name, root.tag))
if length > 0 and len(vars) != length:
raise NotImplementedError('The size of %s is supposed to be %d, but is %d.'%(name, length, len(vars)))
if length == 1:
vars = vars[0]
return vars
def convert(xml_list, json_file):
json_dict = {"info":['none'], "license":['none'], "images": [], "annotations": [], "categories": []}
categories = pre_define_categories.copy()
bnd_id = START_BOUNDING_BOX_ID
all_categories = {}
for index, line in enumerate(xml_list):
# print("Processing %s"%(line))
xml_f = line
tree = ET.parse(xml_f)
root = tree.getroot()
filename = os.path.basename(xml_f)[:-4] + f".{postfix}"
image_id = index
size = get_and_check(root, 'size', 1)
width = int(get_and_check(size, 'width', 1).text)
height = int(get_and_check(size, 'height', 1).text)
image = {'file_name': filename, 'height': height, 'width': width, 'id':image_id}
json_dict['images'].append(image)
## Cruuently we do not support segmentation
# segmented = get_and_check(root, 'segmented', 1).text
# assert segmented == '0'
for obj in get(root, 'object'):
category = get_and_check(obj, 'name', 1).text
if category in all_categories:
all_categories[category] += 1
else:
all_categories[category] = 1
if category not in categories:
if only_care_pre_define_categories:
continue
new_id = len(categories) + 1
print("[warning] category '{}' not in 'pre_define_categories'({}), create new id: {} automatically".format(category, pre_define_categories, new_id))
categories[category] = new_id
category_id = categories[category]
bndbox = get_and_check(obj, 'bndbox', 1)
xmin = int(float(get_and_check(bndbox, 'xmin', 1).text))
ymin = int(float(get_and_check(bndbox, 'ymin', 1).text))
xmax = int(float(get_and_check(bndbox, 'xmax', 1).text))
ymax = int(float(get_and_check(bndbox, 'ymax', 1).text))
# if (xmax > xmin) or (ymax > ymin):
# continue
# assert(xmax > xmin), "xmax <= xmin, {}".format(line)
# assert(ymax > ymin), "ymax <= ymin, {}".format(line)
o_width = abs(xmax - xmin)
o_height = abs(ymax - ymin)
ann = {'area': o_width*o_height, 'iscrowd': 0, 'image_id':
image_id, 'bbox':[xmin, ymin, o_width, o_height],
'category_id': category_id, 'id': bnd_id, 'ignore': 0,
'segmentation': []}
json_dict['annotations'].append(ann)
bnd_id = bnd_id + 1
for cate, cid in categories.items():
cat = {'supercategory': 'none', 'id': cid, 'name': cate}
json_dict['categories'].append(cat)
json_fp = open(json_file, 'w')
json_str = json.dumps(json_dict)
json_fp.write(json_str)
json_fp.close()
print("------------create {} done--------------".format(json_file))
print("find {} categories: {} -->>> your pre_define_categories {}: {}".format(len(all_categories), all_categories.keys(), len(pre_define_categories), pre_define_categories.keys()))
print("category: id --> {}".format(categories))
print(categories.keys())
print(categories.values())
if __name__ == '__main__':
postfix = 'jpg'
# xml标注文件夹
xml_dir = './datasets/Annotations'
# 训练数据的josn文件
save_json_train = './datasets/train.json'
# 验证数据的josn文件
save_json_val = './datasets/val.json'
# 验证数据的test文件
save_json_test = './datasets/test.json'
# 类别,如果是多个类别,往classes中添加类别名字即可,比如['dog', 'person', 'cat']
classes = []
# 是否需要先遍历全部xml文件寻找classes
get_data_classes = True
# 是否只关注classes里面的类别
only_care_pre_define_categories = False
if get_data_classes:
classes = find_classes(xml_dir)
only_care_pre_define_categories = False
pre_define_categories = {}
for i, cls in enumerate(classes):
pre_define_categories[cls] = i + 1
print(pre_define_categories)
# 训练数据集比例
train_ratio = 0.7
val_ratio = 0.1
print('xml_dir is {}'.format(xml_dir))
xml_list = glob.glob(xml_dir + "/*.xml")
xml_list = np.sort(xml_list)
# print('xml_list is {}'.format(xml_list))
np.random.seed(100)
np.random.shuffle(xml_list)
train_num = int(len(xml_list)*train_ratio)
val_num = int(len(xml_list)*val_ratio)
print('训练样本数目是 {}'.format(train_num))
print('验证样本数目是 {}'.format(val_num))
print('测试样本数目是 {}'.format(len(xml_list) - train_num - val_num))
xml_list_val = xml_list[:val_num]
xml_list_train = xml_list[val_num:train_num+val_num]
xml_list_test = xml_list[train_num+val_num:]
# 对训练数据集对应的xml进行coco转换
convert(xml_list_train, save_json_train)
# 对验证数据集的xml进行coco转换
convert(xml_list_val, save_json_val)
# 对测试数据集的xml进行coco转换
convert(xml_list_test, save_json_test)
================================================
FILE: data-offline-aug/object_detection_data_aug.py
================================================
import warnings
warnings.filterwarnings('ignore')
import os, shutil, cv2, tqdm
import numpy as np
import albumentations as A
from PIL import Image
from multiprocessing import Pool
from typing import Callable, Dict, List, Union
# https://github.com/albumentations-team/albumentations
# https://albumentations.ai/docs/api_reference/augmentations/geometric/transforms/#geometric-transforms-augmentationsgeometrictransforms:~:text=Contributing%20to%20Albumentations-,Geometric%20transforms%20(augmentations.geometric.transforms),-%C2%B6
IMAGE_PATH = 'dataset/object_detection/images'
LABEL_PATH = 'dataset/object_detection/labels'
AUG_IMAGE_PATH = 'dataset/object_detection/images_aug'
AUG_LABEL_PATH = 'dataset/object_detection/labels_aug'
SHOW_SAVE_PATH = 'results'
CLASSES = ['head', 'person']
ENHANCEMENT_LOOP = 1
ENHANCEMENT_STRATEGY = A.Compose([
A.Compose([
A.Affine(scale=[0.5, 1.5], translate_percent=[0.0, 0.3], rotate=[-360, 360], shear=[-45, 45], keep_ratio=True, p=0.5), # Augmentation to apply affine transformations to images.
A.BBoxSafeRandomCrop(erosion_rate=0.2, p=0.1), # Crop a random part of the input without loss of bboxes.
A.D4(p=0.1), # Applies one of the eight possible D4 dihedral group transformations to a square-shaped input, maintaining the square shape. These transformations correspond to the symmetries of a square, including rotations and reflections.
A.ElasticTransform(p=0.1), # Elastic deformation of images as described in [Simard2003]_ (with modifications).
A.Flip(p=0.1), # Flip the input either horizontally, vertically or both horizontally and vertically.
A.GridDistortion(p=0.1), # Applies grid distortion augmentation to images, masks, and bounding boxes. This technique involves dividing the image into a grid of cells and randomly displacing the intersection points of the grid, resulting in localized distortions.
A.Perspective(p=0.1), # Perform a random four point perspective transform of the input.
], p=1.0),
A.Compose([
A.GaussNoise(p=0.1), # Apply Gaussian noise to the input image.
A.ISONoise(p=0.1), # Apply camera sensor noise.
A.ImageCompression(quality_lower=50, quality_upper=100, p=0.1), # Decreases image quality by Jpeg, WebP compression of an image.
A.RandomBrightnessContrast(p=0.1), # Randomly change brightness and contrast of the input image.
A.RandomFog(p=0.1), # Simulates fog for the image.
A.RandomRain(p=0.1), # Adds rain effects to an image.
A.RandomSnow(p=0.1), # Bleach out some pixel values imitating snow.
A.RandomShadow(p=0.1), # Simulates shadows for the image
A.RandomSunFlare(p=0.1), # Simulates Sun Flare for the image
A.ToGray(p=0.1), # Convert the input RGB image to grayscale
], p=1.0)
# A.OneOf([
# A.GaussNoise(p=1.0), # Apply Gaussian noise to the input image.
# A.ISONoise(p=1.0), # Apply camera sensor noise.
# A.ImageCompression(quality_lower=50, quality_upper=100, p=1.0), # Decreases image quality by Jpeg, WebP compression of an image.
# A.RandomBrightnessContrast(p=1.0), # Randomly change brightness and contrast of the input image.
# A.RandomFog(p=1.0), # Simulates fog for the image.
# A.RandomRain(p=1.0), # Adds rain effects to an image.
# A.RandomSnow(p=1.0), # Bleach out some pixel values imitating snow.
# A.RandomShadow(p=1.0), # Simulates shadows for the image
# A.RandomSunFlare(p=1.0), # Simulates Sun Flare for the image
# A.ToGray(p=1.0), # Convert the input RGB image to grayscale
# ], p=1.0),
], bbox_params=A.BboxParams(format='yolo', min_visibility=0.1, label_fields=['class_labels']))
def parallelise(function: Callable, data: List, chunksize=100, verbose=True, num_workers=os.cpu_count()) -> List:
num_workers = 1 if num_workers < 1 else num_workers # Pool needs to have at least 1 worker.
pool = Pool(processes=num_workers)
results = list(
tqdm.tqdm(pool.imap(function, data, chunksize), total=len(data), disable=not verbose)
)
pool.close()
pool.join()
return results
def draw_detections(box, name, img):
height, width, _ = img.shape
xmin, ymin, xmax, ymax = list(map(int, list(box)))
# 根据图像大小调整矩形框的线宽和文本的大小
line_thickness = max(1, int(min(height, width) / 200))
font_scale = min(height, width) / 500
font_thickness = max(1, int(min(height, width) / 200))
# 根据图像大小调整文本的纵向位置
text_offset_y = int(min(height, width) / 50)
cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (0, 0, 255), line_thickness)
cv2.putText(img, str(name), (xmin, ymin - text_offset_y), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 255, 0), font_thickness, lineType=cv2.LINE_AA)
return img
def show_labels(images_base_path, labels_base_path):
if os.path.exists(SHOW_SAVE_PATH):
shutil.rmtree(SHOW_SAVE_PATH)
os.makedirs(SHOW_SAVE_PATH, exist_ok=True)
for images_name in tqdm.tqdm(os.listdir(images_base_path)):
file_heads, _ = os.path.splitext(images_name)
# images_path = f'{images_base_path}/{images_name}'
images_path = os.path.join(images_base_path, images_name)
# labels_path = f'{labels_base_path}/{file_heads}.txt'
labels_path = os.path.join(labels_base_path, f'{file_heads}.txt')
if os.path.exists(labels_path):
with open(labels_path) as f:
labels = np.array(list(map(lambda x:np.array(x.strip().split(), dtype=np.float64), f.readlines())), dtype=np.float64)
images = cv2.imread(images_path)
height, width, _ = images.shape
for cls, x_center, y_center, w, h in labels:
x_center *= width
y_center *= height
w *= width
h *= height
draw_detections([x_center - w // 2, y_center - h // 2, x_center + w // 2, y_center + h // 2], CLASSES[int(cls)], images)
# cv2.imwrite(f'{SHOW_SAVE_PATH}/{images_name}', images)
cv2.imwrite(os.path.join(SHOW_SAVE_PATH, images_name), images)
print(f'{SHOW_SAVE_PATH}/{images_name} save success...')
else:
print(f'{labels_path} label file not found...')
def data_aug_single(images_name):
file_heads, postfix = os.path.splitext(images_name)
# images_path = f'{IMAGE_PATH}/{images_name}'
images_path = os.path.join(IMAGE_PATH, images_name)
# labels_path = f'{LABEL_PATH}/{file_heads}.txt'
labels_path = os.path.join(LABEL_PATH, f'{file_heads}.txt')
if os.path.exists(labels_path):
with open(labels_path) as f:
labels = np.array(list(map(lambda x:np.array(x.strip().split(), dtype=np.float64), f.readlines())), dtype=np.float64)
images = Image.open(images_path)
for i in range(ENHANCEMENT_LOOP):
# new_images_name = f'{AUG_IMAGE_PATH}/{file_heads}_{i:0>3}{postfix}'
new_images_name = os.path.join(AUG_IMAGE_PATH, f'{file_heads}_{i:0>3}{postfix}')
# new_labels_name = f'{AUG_LABEL_PATH}/{file_heads}_{i:0>3}.txt'
new_labels_name = os.path.join(AUG_LABEL_PATH, f'{file_heads}_{i:0>3}.txt')
try:
transformed = ENHANCEMENT_STRATEGY(image=np.array(images), bboxes=np.minimum(np.maximum(labels[:, 1:], 0), 1), class_labels=labels[:, 0])
except:
continue
transformed_image = transformed['image']
transformed_bboxes = transformed['bboxes']
transformed_class_labels = transformed['class_labels']
cv2.imwrite(new_images_name, cv2.cvtColor(transformed_image, cv2.COLOR_RGB2BGR))
with open(new_labels_name, 'w+') as f:
for bbox, cls in zip(transformed_bboxes, transformed_class_labels):
f.write(f'{cls} {bbox[0]} {bbox[1]} {bbox[2]} {bbox[3]}\n')
print(f'{new_images_name} and {new_labels_name} save success...')
else:
print(f'{labels_path} label file not found...')
def data_aug():
if os.path.exists(AUG_IMAGE_PATH):
shutil.rmtree(AUG_IMAGE_PATH)
if os.path.exists(AUG_LABEL_PATH):
shutil.rmtree(AUG_LABEL_PATH)
os.makedirs(AUG_IMAGE_PATH, exist_ok=True)
os.makedirs(AUG_LABEL_PATH, exist_ok=True)
for images_name in tqdm.tqdm(os.listdir(IMAGE_PATH)):
data_aug_single(images_name)
if __name__ == '__main__':
# data_aug()
# show_labels(IMAGE_PATH, LABEL_PATH)
show_labels(AUG_IMAGE_PATH, AUG_LABEL_PATH)
================================================
FILE: data-offline-aug/readme.md
================================================
# data-offline-aug
### 环境
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple albumentations
### 1. object_detection_data_aug.py
目标检测数据集yolo格式离线数据增强脚本.
视频教程链接:https://www.bilibili.com/video/BV1bT421k7iq/
### 2. segment_data_aug.py
语义分割离线数据增强脚本.
视频教程链接:https://www.bilibili.com/video/BV1xi421a7Gb/
# Reference
https://github.com/albumentations-team/albumentations
================================================
FILE: data-offline-aug/segment_data_aug.py
================================================
import warnings
warnings.filterwarnings('ignore')
import os, shutil, cv2, tqdm
import numpy as np
np.random.seed(0)
import albumentations as A
from PIL import Image
from multiprocessing import Pool
from typing import Callable, Dict, List, Union
# https://github.com/albumentations-team/albumentations
def generate_color_map(num_classes):
hsv_colors = [(i * 180 // num_classes, 255, 255) for i in range(num_classes)]
rgb_colors = [[0, 0, 0]] + [cv2.cvtColor(np.uint8([[color]]), cv2.COLOR_HSV2BGR)[0][0] for color in hsv_colors]
return np.array(rgb_colors, dtype=np.uint8)
IMAGE_PATH = 'dataset/segment/images'
LABEL_PATH = 'dataset/segment/labels'
AUG_IMAGE_PATH = 'dataset/segment/images_aug'
AUG_LABEL_PATH = 'dataset/segment/labels_aug'
SHOW_SAVE_PATH = 'results'
COLORS = generate_color_map(20)
ENHANCEMENT_LOOP = 1
ENHANCEMENT_STRATEGY = A.Compose([
A.Compose([
A.Affine(scale=[0.5, 1.5], translate_percent=[0.0, 0.3], rotate=[-360, 360], shear=[-45, 45], keep_ratio=True, cval_mask=0, p=0.5), # Augmentation to apply affine transformations to images.
A.BBoxSafeRandomCrop(erosion_rate=0.2, p=0.1), # Crop a random part of the input without loss of bboxes.
A.D4(p=0.1), # Applies one of the eight possible D4 dihedral group transformations to a square-shaped input, maintaining the square shape. These transformations correspond to the symmetries of a square, including rotations and reflections.
A.ElasticTransform(p=0.1), # Elastic deformation of images as described in [Simard2003]_ (with modifications).
A.Flip(p=0.1), # Flip the input either horizontally, vertically or both horizontally and vertically.
A.GridDistortion(p=0.1), # Applies grid distortion augmentation to images, masks, and bounding boxes. This technique involves dividing the image into a grid of cells and randomly displacing the intersection points of the grid, resulting in localized distortions.
A.Perspective(p=0.1), # Perform a random four point perspective transform of the input.
], p=1.0),
A.Compose([
A.GaussNoise(p=0.1), # Apply Gaussian noise to the input image.
A.ISONoise(p=0.1), # Apply camera sensor noise.
A.ImageCompression(quality_lower=50, quality_upper=100, p=0.1), # Decreases image quality by Jpeg, WebP compression of an image.
A.RandomBrightnessContrast(p=0.1), # Randomly change brightness and contrast of the input image.
A.RandomFog(p=0.1), # Simulates fog for the image.
A.RandomRain(p=0.1), # Adds rain effects to an image.
A.RandomSnow(p=0.1), # Bleach out some pixel values imitating snow.
A.RandomShadow(p=0.1), # Simulates shadows for the image
A.RandomSunFlare(p=0.1), # Simulates Sun Flare for the image
A.ToGray(p=0.1), # Convert the input RGB image to grayscale
], p=1.0)
# A.OneOf([
# A.GaussNoise(p=1.0), # Apply Gaussian noise to the input image.
# A.ISONoise(p=1.0), # Apply camera sensor noise.
# A.ImageCompression(quality_lower=50, quality_upper=100, p=1.0), # Decreases image quality by Jpeg, WebP compression of an image.
# A.RandomBrightnessContrast(p=1.0), # Randomly change brightness and contrast of the input image.
# A.RandomFog(p=1.0), # Simulates fog for the image.
# A.RandomRain(p=1.0), # Adds rain effects to an image.
# A.RandomSnow(p=1.0), # Bleach out some pixel values imitating snow.
# A.RandomShadow(p=1.0), # Simulates shadows for the image
# A.RandomSunFlare(p=1.0), # Simulates Sun Flare for the image
# A.ToGray(p=1.0), # Convert the input RGB image to grayscale
# ], p=1.0),
], is_check_shapes=False)
def draw_segments(image, mask):
blended_image = cv2.addWeighted(image, 0.7, COLORS[mask], 0.3, 0)
return blended_image
def show_labels(images_base_path, labels_base_path):
if os.path.exists(SHOW_SAVE_PATH):
shutil.rmtree(SHOW_SAVE_PATH)
os.makedirs(SHOW_SAVE_PATH, exist_ok=True)
for images_name in tqdm.tqdm(os.listdir(images_base_path)):
file_heads, _ = os.path.splitext(images_name)
# images_path = f'{images_base_path}/{images_name}'
images_path = os.path.join(images_base_path, images_name)
# labels_path = f'{labels_base_path}/{file_heads}.png'
labels_path = os.path.join(labels_base_path, f'{file_heads}.png')
if os.path.exists(labels_path):
images = cv2.imread(images_path)
masks = np.array(Image.open(labels_path))
print(np.unique(masks))
images = draw_segments(images, masks)
cv2.imwrite(f'{SHOW_SAVE_PATH}/{images_name}', images)
print(f'{SHOW_SAVE_PATH}/{images_name} save success...')
else:
print(f'{labels_path} label file not found...')
def data_aug_single(images_name):
file_heads, postfix = os.path.splitext(images_name)
# images_path = f'{IMAGE_PATH}/{images_name}'
images_path = os.path.join(IMAGE_PATH, images_name)
# labels_path = f'{LABEL_PATH}/{file_heads}.jpg'
labels_path = os.path.join(LABEL_PATH, f'{file_heads}.jpg')
if os.path.exists(labels_path):
images = Image.open(images_path)
masks = np.array(Image.open(labels_path))
for i in range(ENHANCEMENT_LOOP):
# new_images_name = f'{AUG_IMAGE_PATH}/{file_heads}_{i:0>3}{postfix}'
new_images_name = os.path.join(AUG_IMAGE_PATH, f'{file_heads}_{i:0>3}{postfix}')
# new_labels_name = f'{AUG_LABEL_PATH}/{file_heads}_{i:0>3}.png'
new_labels_name = os.path.join(AUG_LABEL_PATH, f'{file_heads}_{i:0>3}.png')
try:
transformed = ENHANCEMENT_STRATEGY(image=np.array(images), masks=[masks])
except:
continue
transformed_image = transformed['image']
transformed_masks = transformed['masks'][0]
cv2.imwrite(new_images_name, cv2.cvtColor(transformed_image, cv2.COLOR_RGB2BGR))
Image.fromarray(np.array(transformed_masks)).save(new_labels_name)
print(f'{new_images_name} and {new_labels_name} save success...')
else:
print(f'{labels_path} label file not found...')
def data_aug():
if os.path.exists(AUG_IMAGE_PATH):
shutil.rmtree(AUG_IMAGE_PATH)
if os.path.exists(AUG_LABEL_PATH):
shutil.rmtree(AUG_LABEL_PATH)
os.makedirs(AUG_IMAGE_PATH, exist_ok=True)
os.makedirs(AUG_LABEL_PATH, exist_ok=True)
for images_name in tqdm.tqdm(os.listdir(IMAGE_PATH)):
data_aug_single(images_name)
if __name__ == '__main__':
show_labels(IMAGE_PATH, LABEL_PATH)
# show_labels(AUG_IMAGE_PATH, AUG_LABEL_PATH)
# data_aug()
================================================
FILE: mmdet-course/config/atss_r50_fpn_dyhead_1x_visdrone.py
================================================
_base_ = 'atss_r50_fpn_dyhead_1x_coco.py'
model = dict(
bbox_head=dict(
num_classes=10
)
)
# 修改数据集相关配置
data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/'
metainfo = {
'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'),
# 'palette': [
# (220, 20, 60),
# ]
}
train_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-train/annotations/train.json',
data_prefix=dict(img='VisDrone2019-DET-train/images/')))
val_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-val/annotations/val.json',
data_prefix=dict(img='VisDrone2019-DET-val/images/')))
test_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-test-dev/annotations/test.json',
data_prefix=dict(img='VisDrone2019-DET-test-dev/images/')))
# 修改评价指标相关配置
val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json')
test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json')
# optim_wrapper = dict(type='AmpOptimWrapper')
default_hooks = dict(logger=dict(type='LoggerHook', interval=200))
load_from='atss_r50_fpn_dyhead_4x4_1x_coco_20211219_023314-eaa620c6.pth'
# nohup python tools/train.py configs/dyhead/atss_r50_fpn_dyhead_1x_visdrone.py > atss-dyhead-visdrone.log 2>&1 & tail -f atss-dyhead-visdrone.log
# python tools/test.py configs/dyhead/atss_r50_fpn_dyhead_1x_visdrone.py work_dirs/tood_r50_fpn_1x_visdrone/epoch_12.pth --show --show-dir test_save
# python tools/test.py configs/dyhead/atss_r50_fpn_dyhead_1x_visdrone.py work_dirs/tood_r50_fpn_1x_visdrone/epoch_12.pth --tta
================================================
FILE: mmdet-course/config/cascade-rcnn_r50_fpn_1x_visdrone.py
================================================
_base_ = './cascade-rcnn_r50_fpn_1x_coco.py'
# 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数
model = dict(
roi_head=dict(
bbox_head=[
dict(
type='Shared2FCBBoxHead',
num_classes=10
),
dict(
type='Shared2FCBBoxHead',
num_classes=10
),
dict(
type='Shared2FCBBoxHead',
num_classes=10
),
]
)
)
# 修改数据集相关配置
data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/'
metainfo = {
'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'),
# 'palette': [
# (220, 20, 60),
# ]
}
train_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-train/annotations/train.json',
data_prefix=dict(img='VisDrone2019-DET-train/images/')))
val_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-val/annotations/val.json',
data_prefix=dict(img='VisDrone2019-DET-val/images/')))
test_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-test-dev/annotations/test.json',
data_prefix=dict(img='VisDrone2019-DET-test-dev/images/')))
# 修改评价指标相关配置
val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json')
test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json')
# optim_wrapper = dict(type='AmpOptimWrapper')
default_hooks = dict(logger=dict(type='LoggerHook', interval=200))
load_from='cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth'
# nohup python tools/train.py configs/cascade_rcnn/cascade-rcnn_r50_fpn_1x_visdrone.py > cascade-rcnn-visdrone.log 2>&1 & tail -f cascade-rcnn-visdrone.log
# python tools/test.py configs/cascade_rcnn/cascade-rcnn_r50_fpn_1x_visdrone.py work_dirs/cascade-rcnn_r50_fpn_1x_visdrone/epoch_12.pth --show --show-dir test_save
# python tools/test.py configs/cascade_rcnn/cascade-rcnn_r50_fpn_1x_visdrone.py work_dirs/cascade-rcnn_r50_fpn_1x_visdrone/epoch_12.pth --tta
================================================
FILE: mmdet-course/config/ddq-detr-4scale_r50_8xb2-12e_visdrone.py
================================================
_base_ = 'ddq-detr-4scale_r50_8xb2-12e_coco.py'
model = dict(
bbox_head=dict(
type='DDQDETRHead',
num_classes=10
)
)
# 修改数据集相关配置
data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/'
metainfo = {
'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'),
# 'palette': [
# (220, 20, 60),
# ]
}
train_dataloader = dict(
batch_size=2,
num_workers=2,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-train/annotations/train.json',
data_prefix=dict(img='VisDrone2019-DET-train/images/')))
val_dataloader = dict(
batch_size=2,
num_workers=2,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-val/annotations/val.json',
data_prefix=dict(img='VisDrone2019-DET-val/images/')))
test_dataloader = dict(
batch_size=2,
num_workers=2,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-test-dev/annotations/test.json',
data_prefix=dict(img='VisDrone2019-DET-test-dev/images/')))
# 修改评价指标相关配置
val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json')
test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json')
# optim_wrapper = dict(type='AmpOptimWrapper')
default_hooks = dict(logger=dict(type='LoggerHook', interval=1000))
load_from='ddq-detr-4scale_r50_8xb2-12e_coco_20230809_170711-42528127.pth'
# nohup python tools/train.py configs/ddq/ddq-detr-4scale_r50_8xb2-12e_visdrone.py > ddq-visdrone.log 2>&1 & tail -f ddq-visdrone.log
# python tools/test.py configs/ddq/ddq-detr-4scale_r50_8xb2-12e_visdrone.py work_dirs/faster-rcnn_r50_fpn_ciou_1x_visdrone/epoch_12.pth --show --show-dir test_save
# python tools/test.py configs/ddq/ddq-detr-4scale_r50_8xb2-12e_visdrone.py work_dirs/faster-rcnn_r50_fpn_ciou_1x_visdrone/epoch_12.pth --tta
================================================
FILE: mmdet-course/config/dino-4scale_r50_8xb2-12e_visdrone.py
================================================
_base_ = 'dino-4scale_r50_8xb2-12e_coco.py'
model = dict(
bbox_head=dict(
type='DINOHead',
num_classes=10,
)
)
# 修改数据集相关配置
data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/'
metainfo = {
'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'),
# 'palette': [
# (220, 20, 60),
# ]
}
train_dataloader = dict(
batch_size=4,
num_workers=4,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-train/annotations/train.json',
data_prefix=dict(img='VisDrone2019-DET-train/images/')))
val_dataloader = dict(
batch_size=4,
num_workers=4,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-val/annotations/val.json',
data_prefix=dict(img='VisDrone2019-DET-val/images/')))
test_dataloader = dict(
batch_size=4,
num_workers=4,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-test-dev/annotations/test.json',
data_prefix=dict(img='VisDrone2019-DET-test-dev/images/')))
# 修改评价指标相关配置
val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json')
test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json')
# optim_wrapper = dict(type='AmpOptimWrapper')
default_hooks = dict(logger=dict(type='LoggerHook', interval=500))
load_from='dino-4scale_r50_8xb2-12e_coco_20221202_182705-55b2bba2.pth'
# nohup python tools/train.py configs/dino/dino-4scale_r50_8xb2-12e_visdrone.py > dino-visdrone.log 2>&1 & tail -f dino-visdrone.log
# python tools/test.py configs/dino/dino-4scale_r50_8xb2-12e_visdrone.py work_dirs/tood_r50_fpn_1x_visdrone/epoch_12.pth --show --show-dir test_save
# python tools/test.py configs/dino/dino-4scale_r50_8xb2-12e_visdrone.py work_dirs/tood_r50_fpn_1x_visdrone/epoch_12.pth --tta
================================================
FILE: mmdet-course/config/faster-rcnn_r50_fpn_ciou_1x_visdrone.py
================================================
_base_ = 'faster-rcnn_r50_fpn_ciou_1x_coco.py'
# 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数
model = dict(
roi_head=dict(
bbox_head=dict(
type='Shared2FCBBoxHead',
num_classes=10
)
)
)
# 修改数据集相关配置
data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/'
metainfo = {
'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'),
# 'palette': [
# (220, 20, 60),
# ]
}
train_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-train/annotations/train.json',
data_prefix=dict(img='VisDrone2019-DET-train/images/')))
val_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-val/annotations/val.json',
data_prefix=dict(img='VisDrone2019-DET-val/images/')))
test_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-test-dev/annotations/test.json',
data_prefix=dict(img='VisDrone2019-DET-test-dev/images/')))
# 修改评价指标相关配置
val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json')
test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json')
# optim_wrapper = dict(type='AmpOptimWrapper')
default_hooks = dict(logger=dict(type='LoggerHook', interval=200))
load_from='faster_rcnn_r50_fpn_giou_1x_coco-0eada910.pth'
# nohup python tools/train.py configs/faster_rcnn/faster-rcnn_r50_fpn_ciou_1x_visdrone.py > faster-rcnn-visdrone.log 2>&1 & tail -f faster-rcnn-visdrone.log
# python tools/test.py configs/faster_rcnn/faster-rcnn_r50_fpn_ciou_1x_visdrone.py work_dirs/faster-rcnn_r50_fpn_ciou_1x_visdrone/epoch_12.pth --show --show-dir test_save
# python tools/test.py configs/faster_rcnn/faster-rcnn_r50_fpn_ciou_1x_visdrone.py work_dirs/faster-rcnn_r50_fpn_ciou_1x_visdrone/epoch_12.pth --tta
================================================
FILE: mmdet-course/config/gfl_r50_fpn_1x_visdrone.py
================================================
_base_ = 'gfl_r50_fpn_1x_coco.py'
# 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数
model = dict(
bbox_head=dict(
num_classes=10
)
)
# 修改数据集相关配置
data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/'
metainfo = {
'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'),
# 'palette': [
# (220, 20, 60),
# ]
}
train_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-train/annotations/train.json',
data_prefix=dict(img='VisDrone2019-DET-train/images/')))
val_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-val/annotations/val.json',
data_prefix=dict(img='VisDrone2019-DET-val/images/')))
test_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
ann_file='VisDrone2019-DET-test-dev/annotations/test.json',
data_prefix=dict(img='VisDrone2019-DET-test-dev/images/')))
# 修改评价指标相关配置
val_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-val/annotations/val.json')
test_evaluator = dict(ann_file=data_root + 'VisDrone2019-DET-test-dev/annotations/test.json')
# optim_wrapper = dict(type='AmpOptimWrapper')
default_hooks = dict(logger=dict(type='LoggerHook', interval=200))
load_from='gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth'
# nohup python tools/train.py configs/gfl/gfl_r50_fpn_1x_visdrone.py > gfl-visdrone.log 2>&1 & tail -f gfl-visdrone.log
# python tools/test.py configs/gfl/gfl_r50_fpn_1x_visdrone.py work_dirs/gfl_r50_fpn_1x_visdrone/epoch_12.pth --show --show-dir test_save
# python tools/test.py configs/gfl/gfl_r50_fpn_1x_visdrone.py work_dirs/gfl_r50_fpn_1x_visdrone/epoch_12.pth --tta
# python tools/analysis_tools/get_flops.py configs/gfl/gfl_r50_fpn_1x_visdrone.py
================================================
FILE: mmdet-course/config/retinanet_r50_fpn_1x_visdrone.py
================================================
_base_ = 'retinanet_r50_fpn_1x_coco.py'
# 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数
model = dict(
bbox_head=dict(
num_classes=10
)
)
# 修改数据集相关配置
data_root = '/home/hjj/Desktop/dataset/dataset_visdrone/'
metainfo = {
'classes': ('pedestrian', 'people', 'bicycle', 'car', 'van', 'truck', 'tricycle', 'awning-tricycle', 'bus', 'motor'),
# 'palette': [
# (220, 20, 60),
# ]
}
train_dataloader = dict(
batch_size=8,
num_workers=8,
dataset=dict(
data_root=data_root,
metainfo=metainfo,
gitextract_1c2iago4/
├── .gitignore
├── Ultralytics-YOLO-project.md
├── bilibili-guide.md
├── cv-attention/
│ ├── A2Attention.py
│ ├── BAM.py
│ ├── Biformer.py
│ ├── CAA.py
│ ├── CBAM.py
│ ├── CPCA.py
│ ├── CloAttention.py
│ ├── CoTAttention.py
│ ├── CoordAttention.py
│ ├── DAttention.py
│ ├── ECA.py
│ ├── ELA.py
│ ├── EMA.py
│ ├── EffectiveSE.py
│ ├── GAM.py
│ ├── GC.py
│ ├── GE.py
│ ├── LSKA.py
│ ├── LSKBlock.py
│ ├── MHSA.py
│ ├── MLCA.py
│ ├── MobileViTAttention.py
│ ├── ParNetAttention.py
│ ├── PolarizedSelfAttention.py
│ ├── S2Attention.py
│ ├── SE.py
│ ├── SGE.py
│ ├── SK.py
│ ├── SequentialSelfAttention.py
│ ├── ShuffleAttention.py
│ ├── SimAM.py
│ ├── TripletAttention.py
│ └── readme.md
├── cvpr2025-deim-project.md
├── damo-yolo/
│ ├── Annotations/
│ │ └── ReadMe.md
│ ├── JPEGImages/
│ │ └── ReadMe.md
│ ├── readme.md
│ └── voc2coco.py
├── data-offline-aug/
│ ├── object_detection_data_aug.py
│ ├── readme.md
│ └── segment_data_aug.py
├── mmdet-course/
│ ├── config/
│ │ ├── atss_r50_fpn_dyhead_1x_visdrone.py
│ │ ├── cascade-rcnn_r50_fpn_1x_visdrone.py
│ │ ├── ddq-detr-4scale_r50_8xb2-12e_visdrone.py
│ │ ├── dino-4scale_r50_8xb2-12e_visdrone.py
│ │ ├── faster-rcnn_r50_fpn_ciou_1x_visdrone.py
│ │ ├── gfl_r50_fpn_1x_visdrone.py
│ │ ├── retinanet_r50_fpn_1x_visdrone.py
│ │ ├── rtmdet_tiny_8xb32-300e_visdrone.py
│ │ ├── tood_r50_fpn_1x_visdrone.py
│ │ └── yolox_tiny_8xb8-300e_visdrone.py
│ ├── mmdet2yolo.py
│ ├── readme.md
│ └── yolo2coco.py
├── module-info/
│ ├── CVPR2023-SMPConv.md
│ ├── CVPR2024-DCMPNet.md
│ ├── CVPR2024-FADC.md
│ ├── CVPR2024-PKINet.md
│ ├── CVPR2024-ParameterNet.md
│ ├── CVPR2024-RMT.md
│ ├── CVPR2024-RepVIT.md
│ ├── CVPR2024-Rewrite the Stars.md
│ ├── CVPR2024-SFSConv.md
│ ├── CVPR2024-TransNext.md
│ ├── CVPR2024-UniRepLKNet.md
│ ├── CVPR2025-BHViT.md
│ ├── CVPR2025-DarkIR.md
│ ├── CVPR2025-EVSSM.md
│ ├── CVPR2025-EfficientViM.md
│ ├── CVPR2025-FDConv.md
│ ├── CVPR2025-GroupMamba.md
│ ├── CVPR2025-LSNet.md
│ ├── CVPR2025-MambaIRV2.md
│ ├── CVPR2025-MambaOut.md
│ ├── CVPR2025-MambaVision.md
│ ├── CVPR2025-MobileMamba.md
│ ├── CVPR2025-Mona.md
│ ├── CVPR2025-OverLoCK.md
│ ├── CVPR2025-SCSegamba.md
│ ├── CVPR2025-Transformers without Normalization.md
│ ├── CVPR2025-vHeat.md
│ ├── ICLR2025-Pola.md
│ ├── ICLR2025-ToST.md
│ └── TPAMI2025-HyperYOLO.md
├── mutilmodel-project.md
├── objectdetection-tricks/
│ ├── readme.md
│ ├── tricks_1.py
│ ├── tricks_10.py
│ ├── tricks_11.py
│ ├── tricks_12.py
│ ├── tricks_13.py
│ ├── tricks_14.py
│ ├── tricks_15.py
│ ├── tricks_16.py
│ ├── tricks_2.py
│ ├── tricks_3.py
│ ├── tricks_4.py
│ ├── tricks_5.py
│ ├── tricks_6.py
│ ├── tricks_7.py
│ ├── tricks_8.py
│ └── tricks_9.py
├── readme.md
├── visdrone2019-benchmark/
│ └── readme.md
├── yolo/
│ ├── data.yaml
│ ├── dataset/
│ │ ├── VOCdevkit/
│ │ │ ├── Annotations/
│ │ │ │ └── ReadMe.md
│ │ │ ├── JPEGImages/
│ │ │ │ └── ReadMe.md
│ │ │ └── txt/
│ │ │ └── ReadMe.md
│ │ ├── split_data.py
│ │ └── xml2txt.py
│ └── readme.md
├── yolo-gradcam/
│ ├── README.md
│ ├── yolov11_heatmap.py
│ ├── yolov5_heatmap.py
│ ├── yolov7_heatmap.py
│ ├── yolov8_heatmap.py
│ └── yolov9_heatmap.py
└── yolo-improve/
├── CAM.py
├── iou.py
├── paper.md
├── readme.md
├── rtdetr-compress.md
├── rtdetr-distill.md
├── rtdetr-project.md
├── ultralytics-yolo/
│ ├── get_COCO_metrice.py
│ ├── heatmap.py
│ ├── requirements.txt
│ ├── train.py
│ ├── val.py
│ └── yolo2coco.py
├── yolov11-project.md
├── yolov5-AIFI.py
├── yolov5-AUX/
│ ├── benchmarks.py
│ ├── data/
│ │ ├── Argoverse.yaml
│ │ ├── GlobalWheat2020.yaml
│ │ ├── ImageNet.yaml
│ │ ├── Objects365.yaml
│ │ ├── SKU-110K.yaml
│ │ ├── VOC.yaml
│ │ ├── VisDrone.yaml
│ │ ├── coco.yaml
│ │ ├── coco128-seg.yaml
│ │ ├── coco128.yaml
│ │ ├── hyps/
│ │ │ ├── hyp.Objects365.yaml
│ │ │ ├── hyp.VOC.yaml
│ │ │ ├── hyp.no-augmentation.yaml
│ │ │ ├── hyp.scratch-high.yaml
│ │ │ ├── hyp.scratch-low.yaml
│ │ │ └── hyp.scratch-med.yaml
│ │ ├── scripts/
│ │ │ ├── download_weights.sh
│ │ │ ├── get_coco.sh
│ │ │ ├── get_coco128.sh
│ │ │ └── get_imagenet.sh
│ │ └── xView.yaml
│ ├── detect.py
│ ├── export.py
│ ├── hubconf.py
│ ├── models/
│ │ ├── __init__.py
│ │ ├── common.py
│ │ ├── experimental.py
│ │ ├── hub/
│ │ │ ├── anchors.yaml
│ │ │ ├── yolov3-spp.yaml
│ │ │ ├── yolov3-tiny.yaml
│ │ │ ├── yolov3.yaml
│ │ │ ├── yolov5-bifpn.yaml
│ │ │ ├── yolov5-fpn.yaml
│ │ │ ├── yolov5-p2.yaml
│ │ │ ├── yolov5-p34.yaml
│ │ │ ├── yolov5-p6.yaml
│ │ │ ├── yolov5-p7.yaml
│ │ │ ├── yolov5-panet.yaml
│ │ │ ├── yolov5l6.yaml
│ │ │ ├── yolov5m6.yaml
│ │ │ ├── yolov5n6.yaml
│ │ │ ├── yolov5s-LeakyReLU.yaml
│ │ │ ├── yolov5s-ghost.yaml
│ │ │ ├── yolov5s-transformer.yaml
│ │ │ ├── yolov5s6.yaml
│ │ │ └── yolov5x6.yaml
│ │ ├── segment/
│ │ │ ├── yolov5l-seg.yaml
│ │ │ ├── yolov5m-seg.yaml
│ │ │ ├── yolov5n-seg.yaml
│ │ │ ├── yolov5s-seg.yaml
│ │ │ └── yolov5x-seg.yaml
│ │ ├── tf.py
│ │ ├── yolo.py
│ │ ├── yolov5_aux.yaml
│ │ ├── yolov5l.yaml
│ │ ├── yolov5m.yaml
│ │ ├── yolov5n.yaml
│ │ ├── yolov5s.yaml
│ │ └── yolov5x.yaml
│ ├── train.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── activations.py
│ │ ├── augmentations.py
│ │ ├── autoanchor.py
│ │ ├── autobatch.py
│ │ ├── aws/
│ │ │ ├── __init__.py
│ │ │ ├── mime.sh
│ │ │ ├── resume.py
│ │ │ └── userdata.sh
│ │ ├── callbacks.py
│ │ ├── dataloaders.py
│ │ ├── docker/
│ │ │ ├── Dockerfile
│ │ │ ├── Dockerfile-arm64
│ │ │ └── Dockerfile-cpu
│ │ ├── downloads.py
│ │ ├── flask_rest_api/
│ │ │ ├── README.md
│ │ │ ├── example_request.py
│ │ │ └── restapi.py
│ │ ├── general.py
│ │ ├── google_app_engine/
│ │ │ ├── Dockerfile
│ │ │ ├── additional_requirements.txt
│ │ │ └── app.yaml
│ │ ├── loggers/
│ │ │ ├── __init__.py
│ │ │ ├── clearml/
│ │ │ │ ├── README.md
│ │ │ │ ├── __init__.py
│ │ │ │ ├── clearml_utils.py
│ │ │ │ └── hpo.py
│ │ │ └── comet/
│ │ │ ├── README.md
│ │ │ ├── __init__.py
│ │ │ ├── comet_utils.py
│ │ │ ├── hpo.py
│ │ │ └── optimizer_config.json
│ │ ├── loss.py
│ │ ├── metrics.py
│ │ ├── plots.py
│ │ ├── segment/
│ │ │ ├── __init__.py
│ │ │ ├── augmentations.py
│ │ │ ├── dataloaders.py
│ │ │ ├── general.py
│ │ │ ├── loss.py
│ │ │ ├── metrics.py
│ │ │ └── plots.py
│ │ ├── torch_utils.py
│ │ └── triton.py
│ └── val.py
├── yolov5-C3RFEM.py
├── yolov5-CARAFE.py
├── yolov5-CCFM.py
├── yolov5-ContextAggregation.py
├── yolov5-CoordConv.py
├── yolov5-DBB.py
├── yolov5-DCN.py
├── yolov5-DCNV3/
│ ├── commod.py
│ └── ops_dcnv3/
│ ├── functions/
│ │ ├── __init__.py
│ │ └── dcnv3_func.py
│ ├── make.sh
│ ├── modules/
│ │ ├── __init__.py
│ │ └── dcnv3.py
│ ├── setup.py
│ ├── src/
│ │ ├── cpu/
│ │ │ ├── dcnv3_cpu.cpp
│ │ │ └── dcnv3_cpu.h
│ │ ├── cuda/
│ │ │ ├── dcnv3_cuda.cu
│ │ │ ├── dcnv3_cuda.h
│ │ │ └── dcnv3_im2col_cuda.cuh
│ │ ├── dcnv3.h
│ │ └── vision.cpp
│ └── test.py
├── yolov5-DSConv.py
├── yolov5-DecoupledHead.py
├── yolov5-DySnakeConv.py
├── yolov5-EVC.py
├── yolov5-FasterBlock.py
├── yolov5-GFPN/
│ ├── extra_modules.py
│ └── yolov5_GFPN.yaml
├── yolov5-GOLDYOLO/
│ ├── common.py
│ ├── yolo.py
│ ├── yolov5n-goldyolo.yaml
│ ├── yolov7-goldyolo.yaml
│ └── yolov7-tiny-goldyolo.yaml
├── yolov5-NWD.py
├── yolov5-OTA/
│ └── loss.py
├── yolov5-RepNCSPELAN.py
├── yolov5-SAConv.py
├── yolov5-TSCODE.py
├── yolov5-aLRPLoss.py
├── yolov5-asf.py
├── yolov5-backbone/
│ ├── CVPR2023-EfficientViT/
│ │ └── EfficientViT.py
│ ├── CVPR2024-StarNet/
│ │ └── starnet.py
│ ├── ConvNextV2/
│ │ └── convnextv2.py
│ ├── EMO/
│ │ └── emo.py
│ ├── EfficientFormerV2/
│ │ └── EfficientFormerV2.py
│ ├── EfficientViT/
│ │ └── efficientViT.py
│ ├── FocalNet/
│ │ └── FocalNet.py
│ ├── LSKNet/
│ │ └── lsknet.py
│ ├── MobileNetV4/
│ │ └── mobilenetv4.py
│ ├── NextViT/
│ │ └── NextViT.py
│ ├── ODConv/
│ │ ├── od_mobilenetv2.py
│ │ ├── od_resnet.py
│ │ └── odconv.py
│ ├── ODConvFuse/
│ │ ├── od_mobilenetv2.py
│ │ ├── od_resnet.py
│ │ └── odconv.py
│ ├── PoolFormer/
│ │ └── poolformer.py
│ ├── RIFormer/
│ │ └── RIFormer.py
│ ├── RepViT/
│ │ └── repvit.py
│ ├── SwinTransformer/
│ │ └── SwinTransformer.py
│ ├── UniRepLKNet/
│ │ └── unireplknet.py
│ ├── VanillaNet/
│ │ └── VanillaNet.py
│ ├── fasternet/
│ │ ├── faster_cfg/
│ │ │ ├── fasternet_l.yaml
│ │ │ ├── fasternet_m.yaml
│ │ │ ├── fasternet_s.yaml
│ │ │ ├── fasternet_t0.yaml
│ │ │ ├── fasternet_t1.yaml
│ │ │ └── fasternet_t2.yaml
│ │ └── fasternet.py
│ ├── inceptionnext/
│ │ └── inceptionnext.py
│ ├── main.py
│ ├── yolo.py
│ └── yolov5-custom.yaml
├── yolov5-dyhead.py
├── yolov5-res2block.py
├── yolov5-softnms.py
├── yolov5v7-light.md
├── yolov7-CoordConv.py
├── yolov7-DBB.py
├── yolov7-DCN.py
├── yolov7-DCNV3.py
├── yolov7-DSConv.py
├── yolov7-DecoupledHead.py
├── yolov7-DySnakeConv.py
├── yolov7-EVC.py
├── yolov7-MPDiou.py
├── yolov7-NWD.py
├── yolov7-PConv.py
├── yolov7-RFEM.py
├── yolov7-RepNCSPELAN.py
├── yolov7-SAConv.py
├── yolov7-asf.py
├── yolov7-head/
│ ├── yolov7-tiny-5-heads.yaml
│ ├── yolov7-tiny-P2.yaml
│ └── yolov7-tiny-P6.yaml
├── yolov7-iou.py
├── yolov7-odconv.py
├── yolov7-slimneck.py
├── yolov7-softnms.py
├── yolov8-DCN.py
├── yolov8-compress.md
├── yolov8-distill.md
├── yolov8-erf.py
├── yolov8-objectcount.py
├── yolov8-track.py
├── yolov8.py
├── yolov8v10-project.md
└── yolov9-backbone/
├── yolo.py
└── yolov9-c-custom.yaml
SYMBOL INDEX (2361 symbols across 178 files)
FILE: cv-attention/A2Attention.py
class DoubleAttention (line 9) | class DoubleAttention(nn.Module):
method __init__ (line 11) | def __init__(self, in_channels,c_m=128,c_n=128,reconstruct = True):
method init_weights (line 25) | def init_weights(self):
method forward (line 39) | def forward(self, x):
FILE: cv-attention/BAM.py
function autopad (line 6) | def autopad(k, p=None, d=1): # kernel, padding, dilation
class Flatten (line 14) | class Flatten(nn.Module):
method forward (line 15) | def forward(self, x):
class ChannelAttention (line 19) | class ChannelAttention(nn.Module):
method __init__ (line 20) | def __init__(self, channel, reduction=16, num_layers=3):
method forward (line 35) | def forward(self, x):
class SpatialAttention (line 42) | class SpatialAttention(nn.Module):
method __init__ (line 43) | def __init__(self, channel, reduction=16, num_layers=3, dia_val=2):
method forward (line 57) | def forward(self, x):
class BAMBlock (line 63) | class BAMBlock(nn.Module):
method __init__ (line 64) | def __init__(self, channel=512, reduction=16, dia_val=2):
method init_weights (line 70) | def init_weights(self):
method forward (line 84) | def forward(self, x):
FILE: cv-attention/Biformer.py
class TopkRouting (line 22) | class TopkRouting(nn.Module):
method __init__ (line 33) | def __init__(self, qk_dim, topk=4, qk_scale=None, param_routing=False,...
method forward (line 44) | def forward(self, query:Tensor, key:Tensor)->Tuple[Tensor]:
class KVGather (line 61) | class KVGather(nn.Module):
method __init__ (line 62) | def __init__(self, mul_weight='none'):
method forward (line 67) | def forward(self, r_idx:Tensor, r_weight:Tensor, kv:Tensor):
class QKVLinear (line 95) | class QKVLinear(nn.Module):
method __init__ (line 96) | def __init__(self, dim, qk_dim, bias=True):
method forward (line 102) | def forward(self, x):
class BiLevelRoutingAttention (line 108) | class BiLevelRoutingAttention(nn.Module):
method __init__ (line 118) | def __init__(self, dim, n_win=7, num_heads=8, qk_dim=None, qk_scale=None,
method forward (line 203) | def forward(self, x, ret_attn_mask=False):
class Attention (line 287) | class Attention(nn.Module):
method __init__ (line 291) | def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, at...
method forward (line 303) | def forward(self, x):
class AttentionLePE (line 330) | class AttentionLePE(nn.Module):
method __init__ (line 334) | def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, at...
method forward (line 348) | def forward(self, x):
function _grid2seq (line 380) | def _grid2seq(x:Tensor, region_size:Tuple[int], num_heads:int):
function _seq2grid (line 397) | def _seq2grid(x:Tensor, region_h:int, region_w:int, region_size:Tuple[in...
function regional_routing_attention_torch (line 411) | def regional_routing_attention_torch(
class BiLevelRoutingAttention_nchw (line 484) | class BiLevelRoutingAttention_nchw(nn.Module):
method __init__ (line 498) | def __init__(self, dim, num_heads=8, n_win=7, qk_scale=None, topk=4, ...
method forward (line 525) | def forward(self, x:Tensor, ret_attn_mask=False):
FILE: cv-attention/CAA.py
function autopad (line 3) | def autopad(k, p=None, d=1): # kernel, padding, dilation
class Conv (line 12) | class Conv(nn.Module):
method __init__ (line 17) | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
method forward (line 24) | def forward(self, x):
method forward_fuse (line 28) | def forward_fuse(self, x):
class CAA (line 32) | class CAA(nn.Module):
method __init__ (line 33) | def __init__(self, ch, h_kernel_size = 11, v_kernel_size = 11) -> None:
method forward (line 43) | def forward(self, x):
FILE: cv-attention/CBAM.py
class ChannelAttention (line 7) | class ChannelAttention(nn.Module):
method __init__ (line 8) | def __init__(self, channel, reduction=16):
method forward (line 19) | def forward(self, x):
class SpatialAttention (line 28) | class SpatialAttention(nn.Module):
method __init__ (line 29) | def __init__(self, kernel_size=7):
method forward (line 34) | def forward(self, x):
class CBAMBlock (line 43) | class CBAMBlock(nn.Module):
method __init__ (line 45) | def __init__(self, channel=512, reduction=16, kernel_size=7):
method init_weights (line 50) | def init_weights(self):
method forward (line 64) | def forward(self, x):
FILE: cv-attention/CPCA.py
class CPCA_ChannelAttention (line 5) | class CPCA_ChannelAttention(nn.Module):
method __init__ (line 7) | def __init__(self, input_channels, internal_neurons):
method forward (line 13) | def forward(self, inputs):
class CPCA (line 28) | class CPCA(nn.Module):
method __init__ (line 29) | def __init__(self, channels, channelAttention_reduce=4):
method forward (line 43) | def forward(self, inputs):
FILE: cv-attention/CloAttention.py
class AttnMap (line 5) | class AttnMap(nn.Module):
method __init__ (line 6) | def __init__(self, dim):
method forward (line 13) | def forward(self, x):
class EfficientAttention (line 16) | class EfficientAttention(nn.Module):
method __init__ (line 17) | def __init__(self, dim, num_heads=8, group_split=[4, 4], kernel_sizes=...
method high_fre_attntion (line 56) | def high_fre_attntion(self, x: torch.Tensor, to_qkv: nn.Module, mixer:...
method low_fre_attention (line 69) | def low_fre_attention(self, x : torch.Tensor, to_q: nn.Module, to_kv: ...
method forward (line 85) | def forward(self, x: torch.Tensor):
FILE: cv-attention/CoTAttention.py
class CoTAttention (line 10) | class CoTAttention(nn.Module):
method __init__ (line 12) | def __init__(self, dim=512, kernel_size=3):
method forward (line 35) | def forward(self, x):
FILE: cv-attention/CoordAttention.py
class h_sigmoid (line 6) | class h_sigmoid(nn.Module):
method __init__ (line 7) | def __init__(self, inplace=True):
method forward (line 11) | def forward(self, x):
class h_swish (line 15) | class h_swish(nn.Module):
method __init__ (line 16) | def __init__(self, inplace=True):
method forward (line 20) | def forward(self, x):
class CoordAtt (line 24) | class CoordAtt(nn.Module):
method __init__ (line 25) | def __init__(self, inp, reduction=32):
method forward (line 39) | def forward(self, x):
FILE: cv-attention/DAttention.py
class LayerNormProxy (line 7) | class LayerNormProxy(nn.Module):
method __init__ (line 8) | def __init__(self, dim):
method forward (line 12) | def forward(self, x):
class DAttention (line 17) | class DAttention(nn.Module):
method __init__ (line 20) | def __init__(
method _get_ref_points (line 107) | def _get_ref_points(self, H_key, W_key, B, dtype, device):
method _get_q_grid (line 122) | def _get_q_grid(self, H, W, B, dtype, device):
method forward (line 136) | def forward(self, x):
FILE: cv-attention/ECA.py
class EfficientChannelAttention (line 4) | class EfficientChannelAttention(nn.Module): # Efficient Channe...
method __init__ (line 5) | def __init__(self, c, b=1, gamma=2):
method forward (line 14) | def forward(self, x):
FILE: cv-attention/ELA.py
class ELA (line 3) | class ELA(nn.Module):
method __init__ (line 4) | def __init__(self, channels) -> None:
method forward (line 14) | def forward(self, x):
FILE: cv-attention/EMA.py
class EMA (line 4) | class EMA(nn.Module):
method __init__ (line 5) | def __init__(self, channels, factor=8):
method forward (line 17) | def forward(self, x):
FILE: cv-attention/EffectiveSE.py
class EffectiveSEModule (line 6) | class EffectiveSEModule(nn.Module):
method __init__ (line 7) | def __init__(self, channels, add_maxpool=False, gate_layer='hard_sigmo...
method forward (line 13) | def forward(self, x):
FILE: cv-attention/GAM.py
class GAM_Attention (line 4) | class GAM_Attention(nn.Module):
method __init__ (line 5) | def __init__(self, in_channels, rate=4):
method forward (line 22) | def forward(self, x):
FILE: cv-attention/GC.py
class GlobalContext (line 10) | class GlobalContext(nn.Module):
method __init__ (line 12) | def __init__(self, channels, use_attn=True, fuse_add=False, fuse_scale...
method reset_parameters (line 34) | def reset_parameters(self):
method forward (line 40) | def forward(self, x):
FILE: cv-attention/GE.py
class GatherExcite (line 11) | class GatherExcite(nn.Module):
method __init__ (line 12) | def __init__(
method forward (line 54) | def forward(self, x):
FILE: cv-attention/LSKA.py
class LSKA (line 3) | class LSKA(nn.Module):
method __init__ (line 6) | def __init__(self, dim, k_size=7):
method forward (line 44) | def forward(self, x):
FILE: cv-attention/LSKBlock.py
class LSKblock (line 4) | class LSKblock(nn.Module):
method __init__ (line 5) | def __init__(self, dim):
method forward (line 14) | def forward(self, x):
FILE: cv-attention/MHSA.py
class MHSA (line 4) | class MHSA(nn.Module):
method __init__ (line 5) | def __init__(self, n_dims, width=14, height=14, heads=4, pos_emb=False):
method forward (line 20) | def forward(self, x):
FILE: cv-attention/MLCA.py
class MLCA (line 5) | class MLCA(nn.Module):
method __init__ (line 6) | def __init__(self, in_size, local_size=5, gamma = 2, b = 1,local_weigh...
method forward (line 24) | def forward(self, x):
FILE: cv-attention/MobileViTAttention.py
class PreNorm (line 6) | class PreNorm(nn.Module):
method __init__ (line 7) | def __init__(self, dim, fn):
method forward (line 12) | def forward(self, x, **kwargs):
class FeedForward (line 16) | class FeedForward(nn.Module):
method __init__ (line 17) | def __init__(self, dim, mlp_dim, dropout):
method forward (line 27) | def forward(self, x):
class Attention (line 31) | class Attention(nn.Module):
method __init__ (line 32) | def __init__(self, dim, heads, head_dim, dropout):
method forward (line 48) | def forward(self, x):
class Transformer (line 58) | class Transformer(nn.Module):
method __init__ (line 59) | def __init__(self, dim, depth, heads, head_dim, mlp_dim, dropout=0.):
method forward (line 68) | def forward(self, x):
class MobileViTAttention (line 76) | class MobileViTAttention(nn.Module):
method __init__ (line 77) | def __init__(self, in_channel=3, dim=512, kernel_size=3, patch_size=7):
method forward (line 88) | def forward(self, x):
FILE: cv-attention/ParNetAttention.py
class ParNetAttention (line 7) | class ParNetAttention(nn.Module):
method __init__ (line 9) | def __init__(self, channel=512):
method forward (line 27) | def forward(self, x):
FILE: cv-attention/PolarizedSelfAttention.py
class ParallelPolarizedSelfAttention (line 8) | class ParallelPolarizedSelfAttention(nn.Module):
method __init__ (line 10) | def __init__(self, channel=512):
method forward (line 23) | def forward(self, x):
FILE: cv-attention/S2Attention.py
function spatial_shift1 (line 7) | def spatial_shift1(x):
function spatial_shift2 (line 16) | def spatial_shift2(x):
class SplitAttention (line 25) | class SplitAttention(nn.Module):
method __init__ (line 26) | def __init__(self, channel=512, k=3):
method forward (line 35) | def forward(self, x_all):
class S2Attention (line 48) | class S2Attention(nn.Module):
method __init__ (line 50) | def __init__(self, channels=512):
method forward (line 56) | def forward(self, x):
FILE: cv-attention/SE.py
class SEAttention (line 8) | class SEAttention(nn.Module):
method __init__ (line 10) | def __init__(self, channel=512,reduction=16):
method init_weights (line 21) | def init_weights(self):
method forward (line 35) | def forward(self, x):
FILE: cv-attention/SGE.py
class SpatialGroupEnhance (line 6) | class SpatialGroupEnhance(nn.Module):
method __init__ (line 7) | def __init__(self, groups=8):
method init_weights (line 16) | def init_weights(self):
method forward (line 30) | def forward(self, x):
FILE: cv-attention/SK.py
class SKAttention (line 8) | class SKAttention(nn.Module):
method __init__ (line 10) | def __init__(self, channel=512, kernels=[1, 3, 5, 7], reduction=16, gr...
method forward (line 28) | def forward(self, x):
FILE: cv-attention/SequentialSelfAttention.py
class SequentialPolarizedSelfAttention (line 6) | class SequentialPolarizedSelfAttention(nn.Module):
method __init__ (line 8) | def __init__(self, channel=512):
method forward (line 21) | def forward(self, x):
FILE: cv-attention/ShuffleAttention.py
class ShuffleAttention (line 8) | class ShuffleAttention(nn.Module):
method __init__ (line 10) | def __init__(self, channel=512, reduction=16, G=8):
method init_weights (line 22) | def init_weights(self):
method channel_shuffle (line 37) | def channel_shuffle(x, groups):
method forward (line 47) | def forward(self, x):
FILE: cv-attention/SimAM.py
class SimAM (line 5) | class SimAM(torch.nn.Module):
method __init__ (line 6) | def __init__(self, e_lambda=1e-4):
method __repr__ (line 12) | def __repr__(self):
method get_module_name (line 18) | def get_module_name():
method forward (line 21) | def forward(self, x):
FILE: cv-attention/TripletAttention.py
class BasicConv (line 5) | class BasicConv(nn.Module):
method __init__ (line 6) | def __init__(self, in_planes, out_planes, kernel_size, stride=1, paddi...
method forward (line 15) | def forward(self, x):
class ZPool (line 24) | class ZPool(nn.Module):
method forward (line 25) | def forward(self, x):
class AttentionGate (line 29) | class AttentionGate(nn.Module):
method __init__ (line 30) | def __init__(self):
method forward (line 36) | def forward(self, x):
class TripletAttention (line 43) | class TripletAttention(nn.Module):
method __init__ (line 44) | def __init__(self, no_spatial=False):
method forward (line 52) | def forward(self, x):
FILE: damo-yolo/voc2coco.py
function find_classes (line 10) | def find_classes(path):
function get (line 29) | def get(root, name):
function get_and_check (line 33) | def get_and_check(root, name, length):
function convert (line 44) | def convert(xml_list, json_file):
FILE: data-offline-aug/object_detection_data_aug.py
function parallelise (line 59) | def parallelise(function: Callable, data: List, chunksize=100, verbose=T...
function draw_detections (line 69) | def draw_detections(box, name, img):
function show_labels (line 84) | def show_labels(images_base_path, labels_base_path):
function data_aug_single (line 112) | def data_aug_single(images_name):
function data_aug (line 143) | def data_aug():
FILE: data-offline-aug/segment_data_aug.py
function generate_color_map (line 13) | def generate_color_map(num_classes):
function draw_segments (line 64) | def draw_segments(image, mask):
function show_labels (line 68) | def show_labels(images_base_path, labels_base_path):
function data_aug_single (line 89) | def data_aug_single(images_name):
function data_aug (line 116) | def data_aug():
FILE: mmdet-course/mmdet2yolo.py
function clip_boxes (line 5) | def clip_boxes(boxes, shape):
function scale_boxes (line 16) | def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
function box_iou (line 31) | def box_iou(box1, box2, eps=1e-7):
function process_batch (line 53) | def process_batch(detections, labels, iouv):
function smooth (line 77) | def smooth(y, f=0.05):
function ap_per_class (line 85) | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='....
function compute_ap (line 145) | def compute_ap(recall, precision):
function parse_opt (line 172) | def parse_opt():
FILE: mmdet-course/yolo2coco.py
function train_test_val_split_random (line 20) | def train_test_val_split_random(img_paths,ratio_train=0.8,ratio_test=0.1...
function train_test_val_split_by_files (line 29) | def train_test_val_split_by_files(img_paths, root_dir):
function yolo2coco (line 44) | def yolo2coco(arg):
FILE: objectdetection-tricks/tricks_1.py
function xywh2xyxy (line 4) | def xywh2xyxy(box):
function iou (line 11) | def iou(box1, box2):
function draw_box (line 29) | def draw_box(img, box, color):
FILE: objectdetection-tricks/tricks_12.py
function deal_yolov7_result (line 5) | def deal_yolov7_result(data_path):
FILE: objectdetection-tricks/tricks_14.py
function get_weight_size (line 12) | def get_weight_size(path):
FILE: objectdetection-tricks/tricks_15.py
function get_color_by_class (line 38) | def get_color_by_class(class_id):
function draw_detections (line 42) | def draw_detections(box, name, color, img):
function get_images_and_labels_path (line 57) | def get_images_and_labels_path(images_folder_path, labels_folder_path):
function show_dataset_info (line 84) | def show_dataset_info(image_label_dict, visual_box=False, save_path='vis...
function remap_yolo_dataset_class (line 158) | def remap_yolo_dataset_class(labels_path_list, delete_label=[0, 1, 3, 5]):
FILE: objectdetection-tricks/tricks_16.py
function get_color_by_class (line 35) | def get_color_by_class(class_id):
function draw_detections (line 39) | def draw_detections(box, name, color, img):
FILE: objectdetection-tricks/tricks_2.py
function time_synchronized (line 9) | def time_synchronized():
function autopad (line 15) | def autopad(k, p=None, d=1): # kernel, padding, dilation
class Conv2D (line 23) | class Conv2D(nn.Module):
method __init__ (line 24) | def __init__(self, inc, ouc, kernel_size, g=1):
method forward (line 31) | def forward(self, x):
method __str__ (line 34) | def __str__(self):
class DConv2D (line 37) | class DConv2D(nn.Module):
method __init__ (line 38) | def __init__(self, inc, ouc, kernel_size):
method forward (line 44) | def forward(self, x):
method __str__ (line 47) | def __str__(self):
class GhostConv2D (line 50) | class GhostConv2D(nn.Module):
method __init__ (line 51) | def __init__(self, inp, oup, kernel_size=1, ratio=2, dw_size=3):
method forward (line 60) | def forward(self, x):
method __str__ (line 66) | def __str__(self):
class GSConv (line 69) | class GSConv(nn.Module):
method __init__ (line 71) | def __init__(self, c1, c2, k=1, s=1, g=1):
method forward (line 77) | def forward(self, x):
method __str__ (line 93) | def __str__(self):
class DSConv (line 96) | class DSConv(_ConvNd):
method __init__ (line 97) | def __init__(self, in_channels, out_channels, kernel_size, block_size=...
method get_weight_res (line 125) | def get_weight_res(self):
method forward (line 165) | def forward(self, input):
class DSConv2D (line 174) | class DSConv2D(Conv2D):
method __init__ (line 175) | def __init__(self, inc, ouc, kernel_size, g=1):
method __str__ (line 179) | def __str__(self):
class Partial_conv3 (line 182) | class Partial_conv3(nn.Module):
method __init__ (line 183) | def __init__(self, dim, kernel_size, n_div=4, forward='split_cat'):
method forward_slicing (line 196) | def forward_slicing(self, x):
method forward_split_cat (line 202) | def forward_split_cat(self, x):
class PConv (line 209) | class PConv(Conv2D):
method __init__ (line 210) | def __init__(self, inc, ouc, kernel_size, g=1):
method __str__ (line 214) | def __str__(self):
class DCNV2 (line 217) | class DCNV2(nn.Module):
method __init__ (line 218) | def __init__(self, in_channels, out_channels, kernel_size, stride=1,
method forward (line 250) | def forward(self, x):
method reset_parameters (line 272) | def reset_parameters(self):
method __str__ (line 282) | def __str__(self):
class DCNV3 (line 286) | class DCNV3(Conv2D):
method __init__ (line 287) | def __init__(self, inc, ouc, k=1, s=1, p=None, g=1, d=1, act=True):
method __str__ (line 291) | def __str__(self):
method forward (line 294) | def forward(self, x):
FILE: objectdetection-tricks/tricks_3.py
function feature_visualization (line 1) | def feature_visualization(x, module_type, stage, n=32, save_dir=Path('ru...
FILE: objectdetection-tricks/tricks_4.py
function yolo2coco (line 14) | def yolo2coco(arg):
FILE: objectdetection-tricks/tricks_5.py
function show_mask (line 6) | def show_mask(mask, ax, random_color=False):
function show_points (line 15) | def show_points(coords, labels, ax, marker_size=375):
function show_box (line 21) | def show_box(box, ax):
class Select_RoI (line 26) | class Select_RoI:
method __init__ (line 27) | def __init__(self, img) -> None:
method on_mouse (line 45) | def on_mouse(self, event, x, y, flags, param):
method clear (line 59) | def clear(self):
method confirm (line 79) | def confirm(self):
method get_result (line 87) | def get_result(self):
FILE: objectdetection-tricks/tricks_6.py
function check_version (line 2) | def check_version(current='0.0.0', minimum='0.0.0', name='version ', pin...
function set_seeds (line 9) | def set_seeds(seed=0, deterministic=False):
FILE: objectdetection-tricks/tricks_7.py
function get_weight_size (line 29) | def get_weight_size(path):
FILE: objectdetection-tricks/tricks_8.py
function get_weight_size (line 31) | def get_weight_size(path):
FILE: objectdetection-tricks/tricks_9.py
function time_synchronized (line 7) | def time_synchronized():
function fuse_conv_and_bn (line 13) | def fuse_conv_and_bn(conv, bn):
function autopad (line 42) | def autopad(k, p=None, d=1): # kernel, padding, dilation
class Conv (line 50) | class Conv(nn.Module):
method __init__ (line 55) | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
method forward (line 62) | def forward(self, x):
method forward_fuse (line 66) | def forward_fuse(self, x):
class Bottleneck (line 70) | class Bottleneck(nn.Module):
method __init__ (line 73) | def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
method forward (line 83) | def forward(self, x):
class ELAN (line 89) | class ELAN(nn.Module):
method __init__ (line 90) | def __init__(self, inc, ouc, hidc, act=True):
method forward (line 99) | def forward(self, x):
method __str__ (line 107) | def __str__(self):
class C2f (line 112) | class C2f(nn.Module):
method __init__ (line 115) | def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
method forward (line 125) | def forward(self, x):
method forward_split (line 131) | def forward_split(self, x):
method __str__ (line 137) | def __str__(self):
class C3 (line 142) | class C3(nn.Module):
method __init__ (line 145) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
method forward (line 154) | def forward(self, x):
method __str__ (line 158) | def __str__(self):
class RepConvN (line 163) | class RepConvN(nn.Module):
method __init__ (line 169) | def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False...
method forward_fuse (line 181) | def forward_fuse(self, x):
method forward (line 185) | def forward(self, x):
method get_equivalent_kernel_bias (line 190) | def get_equivalent_kernel_bias(self):
method _avg_to_3x3_tensor (line 196) | def _avg_to_3x3_tensor(self, avgp):
method _pad_1x1_to_3x3_tensor (line 205) | def _pad_1x1_to_3x3_tensor(self, kernel1x1):
method _fuse_bn_tensor (line 211) | def _fuse_bn_tensor(self, branch):
method fuse_convs (line 238) | def fuse_convs(self):
class RepNBottleneck (line 263) | class RepNBottleneck(nn.Module):
method __init__ (line 265) | def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5, act=Tr...
method forward (line 272) | def forward(self, x):
class RepNCSP (line 275) | class RepNCSP(nn.Module):
method __init__ (line 277) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, act=True): ...
method forward (line 285) | def forward(self, x):
class RepNCSPELAN4 (line 288) | class RepNCSPELAN4(nn.Module):
method __init__ (line 290) | def __init__(self, c1, c2, c3, c4, c5=1, act=True): # ch_in, ch_out, ...
method forward (line 298) | def forward(self, x):
method forward_split (line 303) | def forward_split(self, x):
method __str__ (line 308) | def __str__(self):
class RepNCSPELAN4_Att (line 311) | class RepNCSPELAN4_Att(nn.Module):
method __init__ (line 313) | def __init__(self, c1, c2, c3, c4, c5=1, act=True): # ch_in, ch_out, ...
method forward (line 321) | def forward(self, x):
method forward_split (line 326) | def forward_split(self, x):
method __str__ (line 331) | def __str__(self):
FILE: yolo-gradcam/yolov11_heatmap.py
function letterbox (line 18) | def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True...
class ActivationsAndGradients (line 50) | class ActivationsAndGradients:
method __init__ (line 54) | def __init__(self, model, target_layers, reshape_transform):
method save_activation (line 68) | def save_activation(self, module, input, output):
method save_gradient (line 75) | def save_gradient(self, module, input, output):
method post_process (line 88) | def post_process(self, result):
method __call__ (line 122) | def __call__(self, x):
method release (line 142) | def release(self):
class yolo_detect_target (line 146) | class yolo_detect_target(torch.nn.Module):
method __init__ (line 147) | def __init__(self, ouput_type, conf, ratio, end2end) -> None:
method forward (line 154) | def forward(self, data):
class yolo_segment_target (line 170) | class yolo_segment_target(yolo_detect_target):
method __init__ (line 171) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 174) | def forward(self, data):
class yolo_pose_target (line 189) | class yolo_pose_target(yolo_detect_target):
method __init__ (line 190) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 193) | def forward(self, data):
class yolo_obb_target (line 208) | class yolo_obb_target(yolo_detect_target):
method __init__ (line 209) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 212) | def forward(self, data):
class yolo_classify_target (line 227) | class yolo_classify_target(yolo_detect_target):
method __init__ (line 228) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 231) | def forward(self, data):
class yolo_heatmap (line 234) | class yolo_heatmap:
method __init__ (line 235) | def __init__(self, weight, device, method, layer, backward_type, conf_...
method post_process (line 271) | def post_process(self, result):
method draw_detections (line 275) | def draw_detections(self, box, color, name, img):
method renormalize_cam_in_bounding_boxes (line 281) | def renormalize_cam_in_bounding_boxes(self, boxes, image_float_np, gra...
method process (line 293) | def process(self, img_path, save_path):
method __call__ (line 331) | def __call__(self, img_path, save_path):
function get_params (line 344) | def get_params():
FILE: yolo-gradcam/yolov5_heatmap.py
class ActivationsAndGradients (line 19) | class ActivationsAndGradients:
method __init__ (line 23) | def __init__(self, model, target_layers, reshape_transform):
method save_activation (line 37) | def save_activation(self, module, input, output):
method save_gradient (line 44) | def save_gradient(self, module, input, output):
method post_process (line 57) | def post_process(self, result):
method __call__ (line 63) | def __call__(self, x):
method release (line 70) | def release(self):
class yolov5_target (line 74) | class yolov5_target(torch.nn.Module):
method __init__ (line 75) | def __init__(self, ouput_type, conf, ratio) -> None:
method forward (line 81) | def forward(self, data):
class yolov5_heatmap (line 94) | class yolov5_heatmap:
method __init__ (line 95) | def __init__(self, weight, device, method, layer, backward_type, conf_...
method post_process (line 112) | def post_process(self, result):
method draw_detections (line 116) | def draw_detections(self, box, color, name, img):
method renormalize_cam_in_bounding_boxes (line 122) | def renormalize_cam_in_bounding_boxes(self, boxes, image_float_np, gra...
method process (line 134) | def process(self, img_path, save_path):
method __call__ (line 163) | def __call__(self, img_path, save_path):
function get_params (line 176) | def get_params():
FILE: yolo-gradcam/yolov7_heatmap.py
class ActivationsAndGradients (line 18) | class ActivationsAndGradients:
method __init__ (line 22) | def __init__(self, model, target_layers, reshape_transform):
method save_activation (line 36) | def save_activation(self, module, input, output):
method save_gradient (line 43) | def save_gradient(self, module, input, output):
method post_process (line 56) | def post_process(self, result):
method __call__ (line 68) | def __call__(self, x):
method release (line 75) | def release(self):
class yolov7_target (line 79) | class yolov7_target(torch.nn.Module):
method __init__ (line 80) | def __init__(self, ouput_type, conf, ratio) -> None:
method forward (line 86) | def forward(self, data):
class yolov7_heatmap (line 99) | class yolov7_heatmap:
method __init__ (line 100) | def __init__(self, weight, device, method, layer, backward_type, conf_...
method post_process (line 117) | def post_process(self, result):
method draw_detections (line 121) | def draw_detections(self, box, color, name, img):
method renormalize_cam_in_bounding_boxes (line 127) | def renormalize_cam_in_bounding_boxes(self, boxes, image_float_np, gra...
method process (line 139) | def process(self, img_path, save_path):
method __call__ (line 168) | def __call__(self, img_path, save_path):
function get_params (line 181) | def get_params():
FILE: yolo-gradcam/yolov8_heatmap.py
function letterbox (line 18) | def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True...
class ActivationsAndGradients (line 50) | class ActivationsAndGradients:
method __init__ (line 54) | def __init__(self, model, target_layers, reshape_transform):
method save_activation (line 68) | def save_activation(self, module, input, output):
method save_gradient (line 75) | def save_gradient(self, module, input, output):
method post_process (line 88) | def post_process(self, result):
method __call__ (line 122) | def __call__(self, x):
method release (line 142) | def release(self):
class yolo_detect_target (line 146) | class yolo_detect_target(torch.nn.Module):
method __init__ (line 147) | def __init__(self, ouput_type, conf, ratio, end2end) -> None:
method forward (line 154) | def forward(self, data):
class yolo_segment_target (line 170) | class yolo_segment_target(yolo_detect_target):
method __init__ (line 171) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 174) | def forward(self, data):
class yolo_pose_target (line 189) | class yolo_pose_target(yolo_detect_target):
method __init__ (line 190) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 193) | def forward(self, data):
class yolo_obb_target (line 208) | class yolo_obb_target(yolo_detect_target):
method __init__ (line 209) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 212) | def forward(self, data):
class yolo_classify_target (line 227) | class yolo_classify_target(yolo_detect_target):
method __init__ (line 228) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 231) | def forward(self, data):
class yolo_heatmap (line 234) | class yolo_heatmap:
method __init__ (line 235) | def __init__(self, weight, device, method, layer, backward_type, conf_...
method post_process (line 271) | def post_process(self, result):
method draw_detections (line 275) | def draw_detections(self, box, color, name, img):
method renormalize_cam_in_bounding_boxes (line 281) | def renormalize_cam_in_bounding_boxes(self, boxes, image_float_np, gra...
method process (line 293) | def process(self, img_path, save_path):
method __call__ (line 331) | def __call__(self, img_path, save_path):
function get_params (line 344) | def get_params():
FILE: yolo-gradcam/yolov9_heatmap.py
class ActivationsAndGradients (line 18) | class ActivationsAndGradients:
method __init__ (line 22) | def __init__(self, model, target_layers, reshape_transform):
method save_activation (line 36) | def save_activation(self, module, input, output):
method save_gradient (line 43) | def save_gradient(self, module, input, output):
method post_process (line 56) | def post_process(self, result):
method __call__ (line 63) | def __call__(self, x):
method release (line 70) | def release(self):
class yolov9_target (line 74) | class yolov9_target(torch.nn.Module):
method __init__ (line 75) | def __init__(self, ouput_type, conf, ratio) -> None:
method forward (line 81) | def forward(self, data):
class yolov9_heatmap (line 94) | class yolov9_heatmap:
method __init__ (line 95) | def __init__(self, weight, device, method, layer, backward_type, conf_...
method post_process (line 112) | def post_process(self, result):
method draw_detections (line 116) | def draw_detections(self, box, color, name, img):
method renormalize_cam_in_bounding_boxes (line 122) | def renormalize_cam_in_bounding_boxes(self, boxes, image_float_np, gra...
method process (line 134) | def process(self, img_path, save_path):
method __call__ (line 163) | def __call__(self, img_path, save_path):
function get_params (line 176) | def get_params():
FILE: yolo-improve/CAM.py
class CAM (line 1) | class CAM(nn.Module):
method __init__ (line 2) | def __init__(self, inc, fusion='weight'):
method forward (line 19) | def forward(self, x):
FILE: yolo-improve/iou.py
class WIoU_Scale (line 4) | class WIoU_Scale:
method __init__ (line 17) | def __init__(self, iou):
method _update (line 22) | def _update(cls, self):
method _scaled_loss (line 27) | def _scaled_loss(cls, self, gamma=1.9, delta=3):
function bbox_iou (line 38) | def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, ...
FILE: yolo-improve/ultralytics-yolo/get_COCO_metrice.py
function parse_opt (line 11) | def parse_opt():
FILE: yolo-improve/ultralytics-yolo/heatmap.py
function patch_pose_classes_for_gradcam (line 21) | def patch_pose_classes_for_gradcam():
function letterbox (line 71) | def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True...
class ActivationsAndGradients (line 103) | class ActivationsAndGradients:
method __init__ (line 107) | def __init__(self, model, target_layers, reshape_transform):
method save_activation (line 121) | def save_activation(self, module, input, output):
method save_gradient (line 128) | def save_gradient(self, module, input, output):
method post_process (line 141) | def post_process(self, result):
method __call__ (line 175) | def __call__(self, x):
method release (line 195) | def release(self):
class yolo_detect_target (line 199) | class yolo_detect_target(torch.nn.Module):
method __init__ (line 200) | def __init__(self, ouput_type, conf, ratio, end2end) -> None:
method _accumulate (line 208) | def _accumulate(acc, value):
method _zero_scalar_like (line 212) | def _zero_scalar_like(tensor):
method forward (line 216) | def forward(self, data):
class yolo_segment_target (line 230) | class yolo_segment_target(yolo_detect_target):
method __init__ (line 231) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 234) | def forward(self, data):
class yolo_pose_target (line 250) | class yolo_pose_target(yolo_detect_target):
method __init__ (line 251) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 254) | def forward(self, data):
class yolo_obb_target (line 270) | class yolo_obb_target(yolo_detect_target):
method __init__ (line 271) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 274) | def forward(self, data):
class yolo_classify_target (line 290) | class yolo_classify_target(yolo_detect_target):
method __init__ (line 291) | def __init__(self, ouput_type, conf, ratio, end2end):
method forward (line 294) | def forward(self, data):
class yolo_heatmap (line 297) | class yolo_heatmap:
method __init__ (line 298) | def __init__(self, weight, device, method, layer, backward_type, conf_...
method post_process (line 350) | def post_process(self, result):
method draw_detections (line 354) | def draw_detections(self, box, color, name, img):
method renormalize_cam_in_bounding_boxes (line 360) | def renormalize_cam_in_bounding_boxes(self, boxes, image_float_np, gra...
method process (line 372) | def process(self, img_path, save_path):
method __call__ (line 414) | def __call__(self, img_path, save_path):
function get_params (line 435) | def get_params():
FILE: yolo-improve/ultralytics-yolo/val.py
function get_weight_size (line 14) | def get_weight_size(path):
FILE: yolo-improve/ultralytics-yolo/yolo2coco.py
class YOLOtoCOCO (line 7) | class YOLOtoCOCO:
method __init__ (line 8) | def __init__(self, yolo_dir, image_dir, class_names, output_json='coco...
method create_categories (line 32) | def create_categories(self):
method yolo_to_coco_bbox (line 42) | def yolo_to_coco_bbox(self, yolo_bbox, img_width, img_height):
method bbox_to_segmentation (line 63) | def bbox_to_segmentation(self, bbox):
method process_image (line 91) | def process_image(self, image_path, label_path):
method convert (line 153) | def convert(self):
FILE: yolo-improve/yolov5-AIFI.py
class TransformerEncoderLayer (line 4) | class TransformerEncoderLayer(nn.Module):
method __init__ (line 7) | def __init__(self, c1, cm=2048, num_heads=8, dropout=0.0, act=nn.GELU(...
method with_pos_embed (line 25) | def with_pos_embed(tensor, pos=None):
method forward_post (line 29) | def forward_post(self, src, src_mask=None, src_key_padding_mask=None, ...
method forward_pre (line 39) | def forward_pre(self, src, src_mask=None, src_key_padding_mask=None, p...
method forward (line 49) | def forward(self, src, src_mask=None, src_key_padding_mask=None, pos=N...
class AIFI (line 56) | class AIFI(TransformerEncoderLayer):
method __init__ (line 59) | def __init__(self, c1, cm=2048, num_heads=8, dropout=0, act=nn.GELU(),...
method forward (line 63) | def forward(self, x):
method build_2d_sincos_position_embedding (line 72) | def build_2d_sincos_position_embedding(w, h, embed_dim=256, temperatur...
FILE: yolo-improve/yolov5-AUX/benchmarks.py
function run (line 52) | def run(
function test (line 114) | def test(
function parse_opt (line 146) | def parse_opt():
function main (line 163) | def main(opt):
FILE: yolo-improve/yolov5-AUX/detect.py
function run (line 54) | def run(
function parse_opt (line 219) | def parse_opt():
function main (line 254) | def main(opt):
FILE: yolo-improve/yolov5-AUX/export.py
function export_formats (line 80) | def export_formats():
function try_export (line 98) | def try_export(inner_func):
function export_torchscript (line 117) | def export_torchscript(model, im, file, optimize, prefix=colorstr('Torch...
function export_onnx (line 133) | def export_onnx(model, im, file, opset, dynamic, simplify, prefix=colors...
function export_openvino (line 189) | def export_openvino(file, metadata, half, prefix=colorstr('OpenVINO:')):
function export_paddle (line 211) | def export_paddle(model, im, file, metadata, prefix=colorstr('PaddlePadd...
function export_coreml (line 226) | def export_coreml(model, im, file, int8, half, prefix=colorstr('CoreML:')):
function export_engine (line 249) | def export_engine(model, im, file, half, dynamic, simplify, workspace=4,...
function export_saved_model (line 311) | def export_saved_model(model,
function export_pb (line 363) | def export_pb(keras_model, file, prefix=colorstr('TensorFlow GraphDef:')):
function export_tflite (line 380) | def export_tflite(keras_model, im, file, int8, data, nms, agnostic_nms, ...
function export_edgetpu (line 411) | def export_edgetpu(file, prefix=colorstr('Edge TPU:')):
function export_tfjs (line 443) | def export_tfjs(file, int8, prefix=colorstr('TensorFlow.js:')):
function add_tflite_metadata (line 476) | def add_tflite_metadata(file, metadata, num_outputs):
function run (line 510) | def run(
function parse_opt (line 632) | def parse_opt(known=False):
function main (line 665) | def main(opt):
FILE: yolo-improve/yolov5-AUX/hubconf.py
function _create (line 16) | def _create(name, pretrained=True, channels=3, classes=80, autoshape=Tru...
function custom (line 81) | def custom(path='path/to/model.pt', autoshape=True, _verbose=True, devic...
function yolov5n (line 86) | def yolov5n(pretrained=True, channels=3, classes=80, autoshape=True, _ve...
function yolov5s (line 91) | def yolov5s(pretrained=True, channels=3, classes=80, autoshape=True, _ve...
function yolov5m (line 96) | def yolov5m(pretrained=True, channels=3, classes=80, autoshape=True, _ve...
function yolov5l (line 101) | def yolov5l(pretrained=True, channels=3, classes=80, autoshape=True, _ve...
function yolov5x (line 106) | def yolov5x(pretrained=True, channels=3, classes=80, autoshape=True, _ve...
function yolov5n6 (line 111) | def yolov5n6(pretrained=True, channels=3, classes=80, autoshape=True, _v...
function yolov5s6 (line 116) | def yolov5s6(pretrained=True, channels=3, classes=80, autoshape=True, _v...
function yolov5m6 (line 121) | def yolov5m6(pretrained=True, channels=3, classes=80, autoshape=True, _v...
function yolov5l6 (line 126) | def yolov5l6(pretrained=True, channels=3, classes=80, autoshape=True, _v...
function yolov5x6 (line 131) | def yolov5x6(pretrained=True, channels=3, classes=80, autoshape=True, _v...
FILE: yolo-improve/yolov5-AUX/models/common.py
function autopad (line 37) | def autopad(k, p=None, d=1): # kernel, padding, dilation
class Conv (line 46) | class Conv(nn.Module):
method __init__ (line 50) | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
method forward (line 56) | def forward(self, x):
method forward_fuse (line 59) | def forward_fuse(self, x):
class DWConv (line 63) | class DWConv(Conv):
method __init__ (line 65) | def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out,...
class DWConvTranspose2d (line 69) | class DWConvTranspose2d(nn.ConvTranspose2d):
method __init__ (line 71) | def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, ke...
class TransformerLayer (line 75) | class TransformerLayer(nn.Module):
method __init__ (line 77) | def __init__(self, c, num_heads):
method forward (line 86) | def forward(self, x):
class TransformerBlock (line 92) | class TransformerBlock(nn.Module):
method __init__ (line 94) | def __init__(self, c1, c2, num_heads, num_layers):
method forward (line 103) | def forward(self, x):
class Bottleneck (line 111) | class Bottleneck(nn.Module):
method __init__ (line 113) | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_ou...
method forward (line 120) | def forward(self, x):
class BottleneckCSP (line 124) | class BottleneckCSP(nn.Module):
method __init__ (line 126) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ...
method forward (line 137) | def forward(self, x):
class CrossConv (line 143) | class CrossConv(nn.Module):
method __init__ (line 145) | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
method forward (line 153) | def forward(self, x):
class C3 (line 157) | class C3(nn.Module):
method __init__ (line 159) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ...
method forward (line 167) | def forward(self, x):
class C3x (line 171) | class C3x(C3):
method __init__ (line 173) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
class C3TR (line 179) | class C3TR(C3):
method __init__ (line 181) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
class C3SPP (line 187) | class C3SPP(C3):
method __init__ (line 189) | def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
class C3Ghost (line 195) | class C3Ghost(C3):
method __init__ (line 197) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
class SPP (line 203) | class SPP(nn.Module):
method __init__ (line 205) | def __init__(self, c1, c2, k=(5, 9, 13)):
method forward (line 212) | def forward(self, x):
class SPPF (line 219) | class SPPF(nn.Module):
method __init__ (line 221) | def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
method forward (line 228) | def forward(self, x):
class Focus (line 237) | class Focus(nn.Module):
method __init__ (line 239) | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in,...
method forward (line 244) | def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
class GhostConv (line 249) | class GhostConv(nn.Module):
method __init__ (line 251) | def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out,...
method forward (line 257) | def forward(self, x):
class GhostBottleneck (line 262) | class GhostBottleneck(nn.Module):
method __init__ (line 264) | def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
method forward (line 274) | def forward(self, x):
class Contract (line 278) | class Contract(nn.Module):
method __init__ (line 280) | def __init__(self, gain=2):
method forward (line 284) | def forward(self, x):
class Expand (line 292) | class Expand(nn.Module):
method __init__ (line 294) | def __init__(self, gain=2):
method forward (line 298) | def forward(self, x):
class Concat (line 306) | class Concat(nn.Module):
method __init__ (line 308) | def __init__(self, dimension=1):
method forward (line 312) | def forward(self, x):
class DetectMultiBackend (line 316) | class DetectMultiBackend(nn.Module):
method __init__ (line 318) | def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), d...
method forward (line 506) | def forward(self, im, augment=False, visualize=False):
method from_numpy (line 588) | def from_numpy(self, x):
method warmup (line 591) | def warmup(self, imgsz=(1, 3, 640, 640)):
method _model_type (line 600) | def _model_type(p='path/to/model.pt'):
method _load_metadata (line 615) | def _load_metadata(f=Path('path/to/meta.yaml')):
class AutoShape (line 623) | class AutoShape(nn.Module):
method __init__ (line 633) | def __init__(self, model, verbose=True):
method _apply (line 646) | def _apply(self, fn):
method forward (line 658) | def forward(self, ims, size=640, augment=False, profile=False):
class Detections (line 722) | class Detections:
method __init__ (line 724) | def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shap...
method _run (line 741) | def _run(self, pprint=False, show=False, save=False, crop=False, rende...
method show (line 787) | def show(self, labels=True):
method save (line 790) | def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
method crop (line 794) | def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
method render (line 798) | def render(self, labels=True):
method pandas (line 802) | def pandas(self):
method tolist (line 812) | def tolist(self):
method print (line 821) | def print(self):
method __len__ (line 824) | def __len__(self): # override len(results)
method __str__ (line 827) | def __str__(self): # override print(results)
method __repr__ (line 830) | def __repr__(self):
class Proto (line 834) | class Proto(nn.Module):
method __init__ (line 836) | def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, num...
method forward (line 843) | def forward(self, x):
class Classify (line 847) | class Classify(nn.Module):
method __init__ (line 849) | def __init__(self,
method forward (line 864) | def forward(self, x):
FILE: yolo-improve/yolov5-AUX/models/experimental.py
class Sum (line 14) | class Sum(nn.Module):
method __init__ (line 16) | def __init__(self, n, weight=False): # n: number of inputs
method forward (line 23) | def forward(self, x):
class MixConv2d (line 35) | class MixConv2d(nn.Module):
method __init__ (line 37) | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): # ch_in, ch...
method forward (line 56) | def forward(self, x):
class Ensemble (line 60) | class Ensemble(nn.ModuleList):
method __init__ (line 62) | def __init__(self):
method forward (line 65) | def forward(self, x, augment=False, profile=False, visualize=False):
function attempt_load (line 73) | def attempt_load(weights, device=None, inplace=True, fuse=True):
FILE: yolo-improve/yolov5-AUX/models/tf.py
class TFBN (line 38) | class TFBN(keras.layers.Layer):
method __init__ (line 40) | def __init__(self, w=None):
method call (line 49) | def call(self, inputs):
class TFPad (line 53) | class TFPad(keras.layers.Layer):
method __init__ (line 55) | def __init__(self, pad):
method call (line 62) | def call(self, inputs):
class TFConv (line 66) | class TFConv(keras.layers.Layer):
method __init__ (line 68) | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
method call (line 86) | def call(self, inputs):
class TFDWConv (line 90) | class TFDWConv(keras.layers.Layer):
method __init__ (line 92) | def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
method call (line 108) | def call(self, inputs):
class TFDWConvTranspose2d (line 112) | class TFDWConvTranspose2d(keras.layers.Layer):
method __init__ (line 114) | def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
method call (line 131) | def call(self, inputs):
class TFFocus (line 135) | class TFFocus(keras.layers.Layer):
method __init__ (line 137) | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
method call (line 142) | def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
class TFBottleneck (line 148) | class TFBottleneck(keras.layers.Layer):
method __init__ (line 150) | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_i...
method call (line 157) | def call(self, inputs):
class TFCrossConv (line 161) | class TFCrossConv(keras.layers.Layer):
method __init__ (line 163) | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
method call (line 170) | def call(self, inputs):
class TFConv2d (line 174) | class TFConv2d(keras.layers.Layer):
method __init__ (line 176) | def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
method call (line 188) | def call(self, inputs):
class TFBottleneckCSP (line 192) | class TFBottleneckCSP(keras.layers.Layer):
method __init__ (line 194) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
method call (line 206) | def call(self, inputs):
class TFC3 (line 212) | class TFC3(keras.layers.Layer):
method __init__ (line 214) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
method call (line 223) | def call(self, inputs):
class TFC3x (line 227) | class TFC3x(keras.layers.Layer):
method __init__ (line 229) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
method call (line 239) | def call(self, inputs):
class TFSPP (line 243) | class TFSPP(keras.layers.Layer):
method __init__ (line 245) | def __init__(self, c1, c2, k=(5, 9, 13), w=None):
method call (line 252) | def call(self, inputs):
class TFSPPF (line 257) | class TFSPPF(keras.layers.Layer):
method __init__ (line 259) | def __init__(self, c1, c2, k=5, w=None):
method call (line 266) | def call(self, inputs):
class TFDetect (line 273) | class TFDetect(keras.layers.Layer):
method __init__ (line 275) | def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None)...
method call (line 292) | def call(self, inputs):
method _make_grid (line 316) | def _make_grid(nx=20, ny=20):
class TFSegment (line 323) | class TFSegment(TFDetect):
method __init__ (line 325) | def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(64...
method call (line 334) | def call(self, x):
class TFProto (line 342) | class TFProto(keras.layers.Layer):
method __init__ (line 344) | def __init__(self, c1, c_=256, c2=32, w=None):
method call (line 351) | def call(self, inputs):
class TFUpsample (line 355) | class TFUpsample(keras.layers.Layer):
method __init__ (line 357) | def __init__(self, size, scale_factor, mode, w=None): # warning: all ...
method call (line 366) | def call(self, inputs):
class TFConcat (line 370) | class TFConcat(keras.layers.Layer):
method __init__ (line 372) | def __init__(self, dimension=1, w=None):
method call (line 377) | def call(self, inputs):
function parse_model (line 381) | def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
class TFModel (line 437) | class TFModel:
method __init__ (line 439) | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgs...
method predict (line 455) | def predict(self,
method _xywh2xyxy (line 498) | def _xywh2xyxy(xywh):
class AgnosticNMS (line 504) | class AgnosticNMS(keras.layers.Layer):
method call (line 506) | def call(self, input, topk_all, iou_thres, conf_thres):
method _nms (line 514) | def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnosti...
function activations (line 542) | def activations(act=nn.SiLU):
function representative_dataset_gen (line 554) | def representative_dataset_gen(dataset, ncalib=100):
function run (line 565) | def run(
function parse_opt (line 590) | def parse_opt():
function main (line 602) | def main(opt):
FILE: yolo-improve/yolov5-AUX/models/yolo.py
class Detect (line 38) | class Detect(nn.Module):
method __init__ (line 44) | def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detecti...
method forward (line 57) | def forward(self, x):
method _make_grid (line 86) | def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch...
class Segment (line 97) | class Segment(Detect):
method __init__ (line 99) | def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=T...
method forward (line 108) | def forward(self, x):
class BaseModel (line 114) | class BaseModel(nn.Module):
method forward (line 116) | def forward(self, x, profile=False, visualize=False):
method _forward_once (line 119) | def _forward_once(self, x, profile=False, visualize=False):
method _profile_one_layer (line 132) | def _profile_one_layer(self, m, x, dt):
method fuse (line 145) | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers
method info (line 155) | def info(self, verbose=False, img_size=640): # print model information
method _apply (line 158) | def _apply(self, fn):
class DetectionModel (line 170) | class DetectionModel(BaseModel):
method __init__ (line 172) | def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): ...
method forward (line 211) | def forward(self, x, augment=False, profile=False, visualize=False):
method _forward_augment (line 216) | def _forward_augment(self, x):
method _descale_pred (line 230) | def _descale_pred(self, p, flips, scale, img_size):
method _clip_augmented (line 247) | def _clip_augmented(self, y):
method _initialize_biases (line 258) | def _initialize_biases(self, cf=None): # initialize biases into Detec...
class SegmentationModel (line 272) | class SegmentationModel(DetectionModel):
method __init__ (line 274) | def __init__(self, cfg='yolov5s-seg.yaml', ch=3, nc=None, anchors=None):
class ClassificationModel (line 278) | class ClassificationModel(BaseModel):
method __init__ (line 280) | def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): # yaml,...
method _from_detection_model (line 284) | def _from_detection_model(self, model, nc=1000, cutoff=10):
method _from_yaml (line 299) | def _from_yaml(self, cfg):
function parse_model (line 304) | def parse_model(d, ch): # model_dict, input_channels(3)
FILE: yolo-improve/yolov5-AUX/train.py
function train (line 70) | def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hy...
function parse_opt (line 436) | def parse_opt(known=False):
function main (line 482) | def main(opt, callbacks=Callbacks()):
function run (line 630) | def run(**kwargs):
FILE: yolo-improve/yolov5-AUX/utils/__init__.py
function emojis (line 11) | def emojis(str=''):
class TryExcept (line 16) | class TryExcept(contextlib.ContextDecorator):
method __init__ (line 18) | def __init__(self, msg=''):
method __enter__ (line 21) | def __enter__(self):
method __exit__ (line 24) | def __exit__(self, exc_type, value, traceback):
function threaded (line 30) | def threaded(func):
function join_threads (line 40) | def join_threads(verbose=False):
function notebook_init (line 50) | def notebook_init(verbose=True):
FILE: yolo-improve/yolov5-AUX/utils/activations.py
class SiLU (line 11) | class SiLU(nn.Module):
method forward (line 14) | def forward(x):
class Hardswish (line 18) | class Hardswish(nn.Module):
method forward (line 21) | def forward(x):
class Mish (line 26) | class Mish(nn.Module):
method forward (line 29) | def forward(x):
class MemoryEfficientMish (line 33) | class MemoryEfficientMish(nn.Module):
class F (line 35) | class F(torch.autograd.Function):
method forward (line 38) | def forward(ctx, x):
method backward (line 43) | def backward(ctx, grad_output):
method forward (line 49) | def forward(self, x):
class FReLU (line 53) | class FReLU(nn.Module):
method __init__ (line 55) | def __init__(self, c1, k=3): # ch_in, kernel
method forward (line 60) | def forward(self, x):
class AconC (line 64) | class AconC(nn.Module):
method __init__ (line 70) | def __init__(self, c1):
method forward (line 76) | def forward(self, x):
class MetaAconC (line 81) | class MetaAconC(nn.Module):
method __init__ (line 87) | def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r
method forward (line 97) | def forward(self, x):
FILE: yolo-improve/yolov5-AUX/utils/augmentations.py
class Albumentations (line 22) | class Albumentations:
method __init__ (line 24) | def __init__(self, size=640):
method __call__ (line 48) | def __call__(self, im, labels, p=1.0):
function normalize (line 55) | def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False):
function denormalize (line 60) | def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD):
function augment_hsv (line 67) | def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
function hist_equalize (line 83) | def hist_equalize(im, clahe=True, bgr=False):
function replicate (line 94) | def replicate(im, labels):
function letterbox (line 111) | def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True...
function random_perspective (line 144) | def random_perspective(im,
function copy_paste (line 240) | def copy_paste(im, labels, segments, p=0.5):
function cutout (line 262) | def cutout(im, labels, p=0.5):
function mixup (line 289) | def mixup(im, labels, im2, labels2):
function box_candidates (line 297) | def box_candidates(box1, box2, wh_thr=2, ar_thr=100, area_thr=0.1, eps=1...
function classify_albumentations (line 305) | def classify_albumentations(
function classify_transforms (line 347) | def classify_transforms(size=224):
class LetterBox (line 354) | class LetterBox:
method __init__ (line 356) | def __init__(self, size=(640, 640), auto=False, stride=32):
method __call__ (line 362) | def __call__(self, im): # im = np.array HWC
class CenterCrop (line 373) | class CenterCrop:
method __init__ (line 375) | def __init__(self, size=640):
method __call__ (line 379) | def __call__(self, im): # im = np.array HWC
class ToTensor (line 386) | class ToTensor:
method __init__ (line 388) | def __init__(self, half=False):
method __call__ (line 392) | def __call__(self, im): # im = np.array HWC in BGR order
FILE: yolo-improve/yolov5-AUX/utils/autoanchor.py
function check_anchor_order (line 19) | def check_anchor_order(m):
function check_anchors (line 30) | def check_anchors(dataset, model, thr=4.0, imgsz=640):
function kmean_anchors (line 67) | def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=...
FILE: yolo-improve/yolov5-AUX/utils/autobatch.py
function check_train_batch_size (line 15) | def check_train_batch_size(model, imgsz=640, amp=True):
function autobatch (line 21) | def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
FILE: yolo-improve/yolov5-AUX/utils/callbacks.py
class Callbacks (line 9) | class Callbacks:
method __init__ (line 14) | def __init__(self):
method register_action (line 38) | def register_action(self, hook, name='', callback=None):
method get_registered_actions (line 51) | def get_registered_actions(self, hook=None):
method run (line 60) | def run(self, hook, *args, thread=False, **kwargs):
FILE: yolo-improve/yolov5-AUX/utils/dataloaders.py
function get_hash (line 52) | def get_hash(paths):
function exif_size (line 60) | def exif_size(img):
function exif_transpose (line 70) | def exif_transpose(image):
function seed_worker (line 96) | def seed_worker(worker_id):
function create_dataloader (line 103) | def create_dataloader(path,
class InfiniteDataLoader (line 156) | class InfiniteDataLoader(dataloader.DataLoader):
method __init__ (line 162) | def __init__(self, *args, **kwargs):
method __len__ (line 167) | def __len__(self):
method __iter__ (line 170) | def __iter__(self):
class _RepeatSampler (line 175) | class _RepeatSampler:
method __init__ (line 182) | def __init__(self, sampler):
method __iter__ (line 185) | def __iter__(self):
class LoadScreenshots (line 190) | class LoadScreenshots:
method __init__ (line 192) | def __init__(self, source, img_size=640, stride=32, auto=True, transfo...
method __iter__ (line 221) | def __iter__(self):
method __next__ (line 224) | def __next__(self):
class LoadImages (line 239) | class LoadImages:
method __init__ (line 241) | def __init__(self, path, img_size=640, stride=32, auto=True, transform...
method __iter__ (line 276) | def __iter__(self):
method __next__ (line 280) | def __next__(self):
method _new_video (line 320) | def _new_video(self, path):
method _cv2_rotate (line 328) | def _cv2_rotate(self, im):
method __len__ (line 338) | def __len__(self):
class LoadStreams (line 342) | class LoadStreams:
method __init__ (line 344) | def __init__(self, sources='file.streams', img_size=640, stride=32, au...
method update (line 388) | def update(self, i, cap, stream):
method __iter__ (line 404) | def __iter__(self):
method __next__ (line 408) | def __next__(self):
method __len__ (line 424) | def __len__(self):
function img2label_paths (line 428) | def img2label_paths(img_paths):
class LoadImagesAndLabels (line 434) | class LoadImagesAndLabels(Dataset):
method __init__ (line 439) | def __init__(self,
method check_cache_ram (line 589) | def check_cache_ram(self, safety_margin=0.1, prefix=''):
method cache_labels (line 606) | def cache_labels(self, path=Path('./labels.cache'), prefix=''):
method __len__ (line 644) | def __len__(self):
method __getitem__ (line 653) | def __getitem__(self, index):
method load_image (line 727) | def load_image(self, i):
method cache_images_to_disk (line 744) | def cache_images_to_disk(self, i):
method load_mosaic (line 750) | def load_mosaic(self, index):
method load_mosaic9 (line 808) | def load_mosaic9(self, index):
method collate_fn (line 886) | def collate_fn(batch):
method collate_fn4 (line 893) | def collate_fn4(batch):
function flatten_recursive (line 920) | def flatten_recursive(path=DATASETS_DIR / 'coco128'):
function extract_boxes (line 930) | def extract_boxes(path=DATASETS_DIR / 'coco128'): # from utils.dataload...
function autosplit (line 964) | def autosplit(path=DATASETS_DIR / 'coco128/images', weights=(0.9, 0.1, 0...
function verify_image_label (line 990) | def verify_image_label(args):
class HUBDatasetStats (line 1042) | class HUBDatasetStats():
method __init__ (line 1057) | def __init__(self, path='coco128.yaml', autodownload=False):
method _find_yaml (line 1076) | def _find_yaml(dir):
method _unzip (line 1086) | def _unzip(self, path):
method _hub_ops (line 1096) | def _hub_ops(self, f, max_dim=1920):
method get_json (line 1114) | def get_json(self, save=False, verbose=False):
method process_images (line 1149) | def process_images(self):
class ClassificationDataset (line 1163) | class ClassificationDataset(torchvision.datasets.ImageFolder):
method __init__ (line 1172) | def __init__(self, root, augment, imgsz, cache=False):
method __getitem__ (line 1180) | def __getitem__(self, i):
function create_classification_dataloader (line 1197) | def create_classification_dataloader(path,
FILE: yolo-improve/yolov5-AUX/utils/downloads.py
function is_url (line 16) | def is_url(url, check=True):
function gsutil_getsize (line 27) | def gsutil_getsize(url=''):
function url_getsize (line 35) | def url_getsize(url='https://ultralytics.com/images/bus.jpg'):
function curl_download (line 41) | def curl_download(url, filename, *, silent: bool = False) -> bool:
function safe_download (line 60) | def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
function attempt_download (line 84) | def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'):
FILE: yolo-improve/yolov5-AUX/utils/flask_rest_api/restapi.py
function predict (line 20) | def predict(model):
FILE: yolo-improve/yolov5-AUX/utils/general.py
function is_ascii (line 64) | def is_ascii(s=''):
function is_chinese (line 70) | def is_chinese(s='人工智能'):
function is_colab (line 75) | def is_colab():
function is_notebook (line 80) | def is_notebook():
function is_kaggle (line 86) | def is_kaggle():
function is_docker (line 91) | def is_docker() -> bool:
function is_writeable (line 102) | def is_writeable(dir, test=False):
function set_logging (line 119) | def set_logging(name=LOGGING_NAME, verbose=True):
function user_config_dir (line 148) | def user_config_dir(dir='Ultralytics', env_var='YOLOV5_CONFIG_DIR'):
class Profile (line 164) | class Profile(contextlib.ContextDecorator):
method __init__ (line 166) | def __init__(self, t=0.0):
method __enter__ (line 170) | def __enter__(self):
method __exit__ (line 174) | def __exit__(self, type, value, traceback):
method time (line 178) | def time(self):
class Timeout (line 184) | class Timeout(contextlib.ContextDecorator):
method __init__ (line 186) | def __init__(self, seconds, *, timeout_msg='', suppress_timeout_errors...
method _timeout_handler (line 191) | def _timeout_handler(self, signum, frame):
method __enter__ (line 194) | def __enter__(self):
method __exit__ (line 199) | def __exit__(self, exc_type, exc_val, exc_tb):
class WorkingDirectory (line 206) | class WorkingDirectory(contextlib.ContextDecorator):
method __init__ (line 208) | def __init__(self, new_dir):
method __enter__ (line 212) | def __enter__(self):
method __exit__ (line 215) | def __exit__(self, exc_type, exc_val, exc_tb):
function methods (line 219) | def methods(instance):
function print_args (line 224) | def print_args(args: Optional[dict] = None, show_file=True, show_func=Fa...
function init_seeds (line 239) | def init_seeds(seed=0, deterministic=False):
function intersect_dicts (line 254) | def intersect_dicts(da, db, exclude=()):
function get_default_args (line 259) | def get_default_args(func):
function get_latest_run (line 265) | def get_latest_run(search_dir='.'):
function file_age (line 271) | def file_age(path=__file__):
function file_date (line 277) | def file_date(path=__file__):
function file_size (line 283) | def file_size(path):
function check_online (line 295) | def check_online():
function git_describe (line 310) | def git_describe(path=ROOT): # path must be a directory
function check_git_status (line 321) | def check_git_status(repo='ultralytics/yolov5', branch='master'):
function check_git_info (line 348) | def check_git_info(path='.'):
function check_python (line 365) | def check_python(minimum='3.7.0'):
function check_version (line 370) | def check_version(current='0.0.0', minimum='0.0.0', name='version ', pin...
function check_requirements (line 383) | def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(...
function check_img_size (line 417) | def check_img_size(imgsz, s=32, floor=0):
function check_imshow (line 429) | def check_imshow(warn=False):
function check_suffix (line 445) | def check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):
function check_yaml (line 456) | def check_yaml(file, suffix=('.yaml', '.yml')):
function check_file (line 461) | def check_file(file, suffix=''):
function check_font (line 489) | def check_font(font=FONT, progress=False):
function check_dataset (line 499) | def check_dataset(data, autodownload=True):
function check_amp (line 565) | def check_amp(model):
function yaml_load (line 593) | def yaml_load(file='data.yaml'):
function yaml_save (line 599) | def yaml_save(file='data.yaml', data={}):
function unzip_file (line 605) | def unzip_file(file, path=None, exclude=('.DS_Store', '__MACOSX')):
function url2file (line 615) | def url2file(url):
function download (line 621) | def download(url, dir='.', unzip=True, delete=True, curl=False, threads=...
function make_divisible (line 667) | def make_divisible(x, divisor):
function clean_str (line 674) | def clean_str(s):
function one_cycle (line 679) | def one_cycle(y1=0.0, y2=1.0, steps=100):
function colorstr (line 684) | def colorstr(*input):
function labels_to_class_weights (line 710) | def labels_to_class_weights(labels, nc=80):
function labels_to_image_weights (line 729) | def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
function coco80_to_coco91_class (line 736) | def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index...
function xyxy2xywh (line 748) | def xyxy2xywh(x):
function xywh2xyxy (line 758) | def xywh2xyxy(x):
function xywhn2xyxy (line 768) | def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
function xyxy2xywhn (line 778) | def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
function xyn2xy (line 790) | def xyn2xy(x, w=640, h=640, padw=0, padh=0):
function segment2box (line 798) | def segment2box(segment, width=640, height=640):
function segments2boxes (line 806) | def segments2boxes(segments):
function resample_segments (line 815) | def resample_segments(segments, n=1000):
function scale_boxes (line 825) | def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
function scale_segments (line 841) | def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, nor...
function clip_boxes (line 860) | def clip_boxes(boxes, shape):
function clip_segments (line 872) | def clip_segments(segments, shape):
function non_max_suppression (line 882) | def non_max_suppression(
function strip_optimizer (line 995) | def strip_optimizer(f='best.pt', s=''): # from utils.general import *; ...
function print_mutation (line 1011) | def print_mutation(keys, results, hyp, save_dir, bucket, prefix=colorstr...
function apply_classifier (line 1050) | def apply_classifier(x, model, img, im0):
function increment_path (line 1085) | def increment_path(path, exist_ok=False, sep='', mkdir=False):
function imread (line 1115) | def imread(path, flags=cv2.IMREAD_COLOR):
function imwrite (line 1119) | def imwrite(path, im):
function imshow (line 1127) | def imshow(path, im):
FILE: yolo-improve/yolov5-AUX/utils/loggers/__init__.py
class Loggers (line 57) | class Loggers():
method __init__ (line 59) | def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, lo...
method remote_dataset (line 135) | def remote_dataset(self):
method on_train_start (line 147) | def on_train_start(self):
method on_pretrain_routine_start (line 151) | def on_pretrain_routine_start(self):
method on_pretrain_routine_end (line 155) | def on_pretrain_routine_end(self, labels, names):
method on_train_batch_end (line 167) | def on_train_batch_end(self, model, ni, imgs, targets, paths, vals):
method on_train_epoch_end (line 187) | def on_train_epoch_end(self, epoch):
method on_val_start (line 195) | def on_val_start(self):
method on_val_image_end (line 199) | def on_val_image_end(self, pred, predn, path, names, im):
method on_val_batch_end (line 206) | def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out):
method on_val_end (line 210) | def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusi...
method on_fit_epoch_end (line 222) | def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
method on_model_save (line 255) | def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
method on_train_end (line 268) | def on_train_end(self, last, best, epoch, results):
method on_params_update (line 300) | def on_params_update(self, params: dict):
class GenericLogger (line 308) | class GenericLogger:
method __init__ (line 318) | def __init__(self, opt, console_logger, include=('tb', 'wandb')):
method log_metrics (line 337) | def log_metrics(self, metrics, epoch):
method log_images (line 353) | def log_images(self, files, name='Images', epoch=0):
method log_graph (line 365) | def log_graph(self, model, imgsz=(640, 640)):
method log_model (line 370) | def log_model(self, model_path, epoch=0, metadata={}):
method update_params (line 377) | def update_params(self, params):
function log_tensorboard_graph (line 383) | def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
function web_project_name (line 396) | def web_project_name(project):
FILE: yolo-improve/yolov5-AUX/utils/loggers/clearml/clearml_utils.py
function construct_dataset (line 20) | def construct_dataset(clearml_info_string):
class ClearmlLogger (line 55) | class ClearmlLogger:
method __init__ (line 66) | def __init__(self, opt, hyp):
method log_debug_samples (line 116) | def log_debug_samples(self, files, title='Debug Samples'):
method log_image_with_boxes (line 133) | def log_image_with_boxes(self, image_path, boxes, class_names, image, ...
FILE: yolo-improve/yolov5-AUX/utils/loggers/comet/__init__.py
class CometLogger (line 64) | class CometLogger:
method __init__ (line 69) | def __init__(self, opt, hyp, run_id=None, job_type='Training', **exper...
method _get_experiment (line 164) | def _get_experiment(self, mode, experiment_id=None):
method log_metrics (line 193) | def log_metrics(self, log_dict, **kwargs):
method log_parameters (line 196) | def log_parameters(self, log_dict, **kwargs):
method log_asset (line 199) | def log_asset(self, asset_path, **kwargs):
method log_asset_data (line 202) | def log_asset_data(self, asset, **kwargs):
method log_image (line 205) | def log_image(self, img, **kwargs):
method log_model (line 208) | def log_model(self, path, opt, epoch, fitness_score, best_model=False):
method check_dataset (line 230) | def check_dataset(self, data_file):
method log_predictions (line 244) | def log_predictions(self, image, labelsn, path, shape, predn):
method preprocess_prediction (line 288) | def preprocess_prediction(self, image, labels, shape, pred):
method add_assets_to_artifact (line 307) | def add_assets_to_artifact(self, artifact, path, asset_path, split):
method upload_dataset_artifact (line 324) | def upload_dataset_artifact(self):
method download_dataset_artifact (line 348) | def download_dataset_artifact(self, artifact_path):
method update_data_paths (line 368) | def update_data_paths(self, data_dict):
method on_pretrain_routine_end (line 379) | def on_pretrain_routine_end(self, paths):
method on_train_start (line 392) | def on_train_start(self):
method on_train_epoch_start (line 395) | def on_train_epoch_start(self):
method on_train_epoch_end (line 398) | def on_train_epoch_end(self, epoch):
method on_train_batch_start (line 403) | def on_train_batch_start(self):
method on_train_batch_end (line 406) | def on_train_batch_end(self, log_dict, step):
method on_train_end (line 413) | def on_train_end(self, files, save_dir, last, best, epoch, results):
method on_val_start (line 440) | def on_val_start(self):
method on_val_batch_start (line 443) | def on_val_batch_start(self):
method on_val_batch_end (line 446) | def on_val_batch_end(self, batch_i, images, targets, paths, shapes, ou...
method on_val_end (line 464) | def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusi...
method on_fit_epoch_end (line 497) | def on_fit_epoch_end(self, result, epoch):
method on_model_save (line 500) | def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
method on_params_update (line 504) | def on_params_update(self, params):
method finish_run (line 507) | def finish_run(self):
FILE: yolo-improve/yolov5-AUX/utils/loggers/comet/comet_utils.py
function download_model_checkpoint (line 19) | def download_model_checkpoint(opt, experiment):
function set_opt_parameters (line 66) | def set_opt_parameters(opt, experiment):
function check_comet_weights (line 97) | def check_comet_weights(opt):
function check_comet_resume (line 124) | def check_comet_resume(opt):
FILE: yolo-improve/yolov5-AUX/utils/loggers/comet/hpo.py
function get_args (line 27) | def get_args(known=False):
function run (line 83) | def run(parameters, opt):
FILE: yolo-improve/yolov5-AUX/utils/loss.py
function smooth_BCE (line 15) | def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues...
class BCEBlurWithLogitsLoss (line 20) | class BCEBlurWithLogitsLoss(nn.Module):
method __init__ (line 22) | def __init__(self, alpha=0.05):
method forward (line 27) | def forward(self, pred, true):
class FocalLoss (line 37) | class FocalLoss(nn.Module):
method __init__ (line 39) | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
method forward (line 47) | def forward(self, pred, true):
class QFocalLoss (line 67) | class QFocalLoss(nn.Module):
method __init__ (line 69) | def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
method forward (line 77) | def forward(self, pred, true):
class ComputeLoss (line 93) | class ComputeLoss:
method __init__ (line 97) | def __init__(self, model, autobalance=False):
method __call__ (line 123) | def __call__(self, p, targets): # predictions, targets
method build_targets (line 179) | def build_targets(self, p, targets):
class ComputeLossAuxOTA (line 238) | class ComputeLossAuxOTA:
method __init__ (line 240) | def __init__(self, model, autobalance=False):
method __call__ (line 264) | def __call__(self, p, targets, imgs): # predictions, targets, model
method build_targets (line 349) | def build_targets(self, p, targets, imgs):
method build_targets2 (line 502) | def build_targets2(self, p, targets, imgs):
method find_5_positive (line 655) | def find_5_positive(self, p, targets):
method find_3_positive (line 708) | def find_3_positive(self, p, targets):
FILE: yolo-improve/yolov5-AUX/utils/metrics.py
function fitness (line 17) | def fitness(x):
function smooth (line 23) | def smooth(y, f=0.05):
function ap_per_class (line 31) | def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='....
function compute_ap (line 98) | def compute_ap(recall, precision):
class ConfusionMatrix (line 126) | class ConfusionMatrix:
method __init__ (line 128) | def __init__(self, nc, conf=0.25, iou_thres=0.45):
method process_batch (line 134) | def process_batch(self, detections, labels):
method tp_fp (line 180) | def tp_fp(self):
method plot (line 187) | def plot(self, normalize=True, save_dir='', names=()):
method print (line 217) | def print(self):
function bbox_iou (line 222) | def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, ...
function box_iou (line 263) | def box_iou(box1, box2, eps=1e-7):
function bbox_ioa (line 284) | def bbox_ioa(box1, box2, eps=1e-7):
function wh_iou (line 306) | def wh_iou(wh1, wh2, eps=1e-7):
function plot_pr_curve (line 318) | def plot_pr_curve(px, py, ap, save_dir=Path('pr_curve.png'), names=()):
function plot_mc_curve (line 341) | def plot_mc_curve(px, py, save_dir=Path('mc_curve.png'), names=(), xlabe...
FILE: yolo-improve/yolov5-AUX/utils/plots.py
class Colors (line 34) | class Colors:
method __init__ (line 36) | def __init__(self):
method __call__ (line 43) | def __call__(self, i, bgr=False):
method hex2rgb (line 48) | def hex2rgb(h): # rgb order (PIL)
function check_pil_font (line 55) | def check_pil_font(font=FONT, size=10):
class Annotator (line 71) | class Annotator:
method __init__ (line 73) | def __init__(self, im, line_width=None, font_size=None, font='Arial.tt...
method box_label (line 86) | def box_label(self, box, label='', color=(128, 128, 128), txt_color=(2...
method masks (line 118) | def masks(self, masks, colors, im_gpu, alpha=0.5, retina_masks=False):
method rectangle (line 148) | def rectangle(self, xy, fill=None, outline=None, width=1):
method text (line 152) | def text(self, xy, text, txt_color=(255, 255, 255), anchor='top'):
method fromarray (line 159) | def fromarray(self, im):
method result (line 164) | def result(self):
function feature_visualization (line 169) | def feature_visualization(x, module_type, stage, n=32, save_dir=Path('ru...
function hist2d (line 197) | def hist2d(x, y, n=100):
function butter_lowpass_filtfilt (line 206) | def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
function output_to_target (line 219) | def output_to_target(output, max_det=300):
function plot_images (line 230) | def plot_images(images, targets, paths=None, fname='images.jpg', names=N...
function plot_lr_scheduler (line 294) | def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
function plot_val_txt (line 311) | def plot_val_txt(): # from utils.plots import *; plot_val()
function plot_targets_txt (line 328) | def plot_targets_txt(): # from utils.plots import *; plot_targets_txt()
function plot_val_study (line 341) | def plot_val_study(file='', dir='', x=None): # from utils.plots import ...
function plot_labels (line 387) | def plot_labels(labels, names=(), save_dir=Path('')):
function imshow_cls (line 432) | def imshow_cls(im, labels=None, pred=None, names=None, nmax=25, verbose=...
function plot_evolve (line 461) | def plot_evolve(evolve_csv='path/to/evolve.csv'): # from utils.plots im...
function plot_results (line 488) | def plot_results(file='path/to/results.csv', dir=''):
function profile_idetection (line 514) | def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
function save_one_box (line 545) | def save_one_box(xyxy, im, file=Path('im.jpg'), gain=1.02, pad=10, squar...
FILE: yolo-improve/yolov5-AUX/utils/segment/augmentations.py
function mixup (line 16) | def mixup(im, labels, segments, im2, labels2, segments2):
function random_perspective (line 25) | def random_perspective(im,
FILE: yolo-improve/yolov5-AUX/utils/segment/dataloaders.py
function create_dataloader (line 23) | def create_dataloader(path,
class LoadImagesAndLabelsAndMasks (line 82) | class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/...
method __init__ (line 84) | def __init__(
method __getitem__ (line 107) | def __getitem__(self, index):
method load_mosaic (line 208) | def load_mosaic(self, index):
method collate_fn (line 267) | def collate_fn(batch):
function polygon2mask (line 275) | def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
function polygons2masks (line 295) | def polygons2masks(img_size, polygons, color, downsample_ratio=1):
function polygons2masks_overlap (line 310) | def polygons2masks_overlap(img_size, segments, downsample_ratio=1):
FILE: yolo-improve/yolov5-AUX/utils/segment/general.py
function crop_mask (line 7) | def crop_mask(masks, boxes):
function process_mask_upsample (line 25) | def process_mask_upsample(protos, masks_in, bboxes, shape):
function process_mask (line 43) | def process_mask(protos, masks_in, bboxes, shape, upsample=False):
function process_mask_native (line 70) | def process_mask_native(protos, masks_in, bboxes, shape):
function scale_image (line 93) | def scale_image(im1_shape, masks, im0_shape, ratio_pad=None):
function mask_iou (line 121) | def mask_iou(mask1, mask2, eps=1e-7):
function masks_iou (line 134) | def masks_iou(mask1, mask2, eps=1e-7):
function masks2segments (line 147) | def masks2segments(masks, strategy='largest'):
FILE: yolo-improve/yolov5-AUX/utils/segment/loss.py
class ComputeLoss (line 12) | class ComputeLoss:
method __init__ (line 14) | def __init__(self, model, autobalance=False, overlap=False):
method __call__ (line 44) | def __call__(self, preds, targets, masks): # predictions, targets, model
method single_mask_loss (line 112) | def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
method build_targets (line 118) | def build_targets(self, p, targets):
FILE: yolo-improve/yolov5-AUX/utils/segment/metrics.py
function fitness (line 11) | def fitness(x):
function ap_per_class_box_and_mask (line 17) | def ap_per_class_box_and_mask(
class Metric (line 66) | class Metric:
method __init__ (line 68) | def __init__(self) -> None:
method ap50 (line 76) | def ap50(self):
method ap (line 84) | def ap(self):
method mp (line 92) | def mp(self):
method mr (line 100) | def mr(self):
method map50 (line 108) | def map50(self):
method map (line 116) | def map(self):
method mean_results (line 123) | def mean_results(self):
method class_result (line 127) | def class_result(self, i):
method get_maps (line 131) | def get_maps(self, nc):
method update (line 137) | def update(self, results):
class Metrics (line 150) | class Metrics:
method __init__ (line 153) | def __init__(self) -> None:
method update (line 157) | def update(self, results):
method mean_results (line 165) | def mean_results(self):
method class_result (line 168) | def class_result(self, i):
method get_maps (line 171) | def get_maps(self, nc):
method ap_class_index (line 175) | def ap_class_index(self):
FILE: yolo-improve/yolov5-AUX/utils/segment/plots.py
function plot_images_and_masks (line 17) | def plot_images_and_masks(images, targets, masks, paths=None, fname='ima...
function plot_results_with_masks (line 111) | def plot_results_with_masks(file='path/to/results.csv', dir='', best=True):
FILE: yolo-improve/yolov5-AUX/utils/torch_utils.py
function smart_inference_mode (line 38) | def smart_inference_mode(torch_1_9=check_version(torch.__version__, '1.9...
function smartCrossEntropyLoss (line 46) | def smartCrossEntropyLoss(label_smoothing=0.0):
function smart_DDP (line 55) | def smart_DDP(model):
function reshape_classifier_output (line 66) | def reshape_classifier_output(model, n=1000):
function torch_distributed_zero_first (line 89) | def torch_distributed_zero_first(local_rank: int):
function device_count (line 98) | def device_count():
function select_device (line 108) | def select_device(device='', batch_size=0, newline=True):
function time_sync (line 144) | def time_sync():
function profile (line 151) | def profile(input, ops, n=10, device=None):
function is_parallel (line 202) | def is_parallel(model):
function de_parallel (line 207) | def de_parallel(model):
function initialize_weights (line 212) | def initialize_weights(model):
function find_modules (line 224) | def find_modules(model, mclass=nn.Conv2d):
function sparsity (line 229) | def sparsity(model):
function prune (line 238) | def prune(model, amount=0.3):
function fuse_conv_and_bn (line 248) | def fuse_conv_and_bn(conv, bn):
function model_info (line 272) | def model_info(model, verbose=False, imgsz=640):
function scale_img (line 297) | def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,...
function copy_attr (line 309) | def copy_attr(a, b, include=(), exclude=()):
function smart_optimizer (line 318) | def smart_optimizer(model, name='Adam', lr=0.001, momentum=0.9, decay=1e...
function smart_hub_load (line 349) | def smart_hub_load(repo='ultralytics/yolov5', model='yolov5s', **kwargs):
function smart_resume (line 361) | def smart_resume(ckpt, optimizer, ema=None, weights='yolov5s.pt', epochs...
class EarlyStopping (line 381) | class EarlyStopping:
method __init__ (line 383) | def __init__(self, patience=30):
method __call__ (line 389) | def __call__(self, epoch, fitness):
class ModelEMA (line 404) | class ModelEMA:
method __init__ (line 410) | def __init__(self, model, decay=0.9999, tau=2000, updates=0):
method update (line 418) | def update(self, model):
method update_attr (line 430) | def update_attr(self, model, include=(), exclude=('process_group', 're...
FILE: yolo-improve/yolov5-AUX/utils/triton.py
class TritonRemoteModel (line 11) | class TritonRemoteModel:
method __init__ (line 17) | def __init__(self, url: str):
method runtime (line 51) | def runtime(self):
method __call__ (line 55) | def __call__(self, *args, **kwargs) -> typing.Union[torch.Tensor, typi...
method _create_inputs (line 68) | def _create_inputs(self, *args, **kwargs):
FILE: yolo-improve/yolov5-AUX/val.py
function save_one_txt (line 50) | def save_one_txt(predn, save_conf, shape, file):
function save_one_json (line 60) | def save_one_json(predn, jdict, path, class_map):
function process_batch (line 73) | def process_batch(detections, labels, iouv):
function run (line 99) | def run(
function parse_opt (line 340) | def parse_opt():
function main (line 372) | def main(opt):
FILE: yolo-improve/yolov5-C3RFEM.py
class TridentBlock (line 1) | class TridentBlock(nn.Module):
method __init__ (line 2) | def __init__(self, c1, c2, stride=1, c=False, e=0.5, padding=[1, 2, 3]...
method forward_for_small (line 28) | def forward_for_small(self, x):
method forward_for_middle (line 42) | def forward_for_middle(self, x):
method forward_for_big (line 56) | def forward_for_big(self, x):
method forward (line 70) | def forward(self, x):
class RFEM (line 88) | class RFEM(nn.Module):
method __init__ (line 89) | def __init__(self, c1, c2, n=1, e=0.5, stride=1):
method forward (line 101) | def forward(self, x):
class C3RFEM (line 107) | class C3RFEM(C3):
method __init__ (line 109) | def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
FILE: yolo-improve/yolov5-CARAFE.py
class CARAFE (line 1) | class CARAFE(nn.Module):
method __init__ (line 2) | def __init__(self, c, k_enc=3, k_up=5, c_mid=64, scale=2):
method forward (line 25) | def forward(self, X):
FILE: yolo-improve/yolov5-CCFM.py
class RepConv (line 1) | class RepConv(nn.Module):
method __init__ (line 10) | def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False...
method forward_fuse (line 23) | def forward_fuse(self, x):
method forward (line 27) | def forward(self, x):
method get_equivalent_kernel_bias (line 32) | def get_equivalent_kernel_bias(self):
method _pad_1x1_to_3x3_tensor (line 39) | def _pad_1x1_to_3x3_tensor(self, kernel1x1):
method _fuse_bn_tensor (line 46) | def _fuse_bn_tensor(self, branch):
method fuse_convs (line 74) | def fuse_convs(self):
class RepC3 (line 100) | class RepC3(nn.Module):
method __init__ (line 103) | def __init__(self, c1, c2, n=3, e=1.0):
method forward (line 112) | def forward(self, x):
FILE: yolo-improve/yolov5-ContextAggregation.py
class ContextAggregation (line 3) | class ContextAggregation(nn.Module):
method __init__ (line 14) | def __init__(self, in_channels, reduction=1):
method init_weights (line 29) | def init_weights(self):
method forward (line 34) | def forward(self, x):
FILE: yolo-improve/yolov5-CoordConv.py
class AddCoords (line 1) | class AddCoords(nn.Module):
method __init__ (line 3) | def __init__(self, with_r=False):
method forward (line 7) | def forward(self, input_tensor):
class CoordConv (line 38) | class CoordConv(nn.Module):
method __init__ (line 40) | def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,...
method forward (line 48) | def forward(self, x):
FILE: yolo-improve/yolov5-DBB.py
function transI_fusebn (line 2) | def transI_fusebn(kernel, bn):
function transII_addbranch (line 7) | def transII_addbranch(kernels, biases):
function transIII_1x1_kxk (line 10) | def transIII_1x1_kxk(k1, b1, k2, b2, groups):
function transIV_depthconcat (line 28) | def transIV_depthconcat(kernels, biases):
function transV_avg (line 31) | def transV_avg(channels, kernel_size, groups):
function transVI_multiscale (line 38) | def transVI_multiscale(kernel, target_kernel_size):
function conv_bn (line 43) | def conv_bn(in_channels, out_channels, kernel_size, stride=1, padding=0,...
class IdentityBasedConv1x1 (line 55) | class IdentityBasedConv1x1(nn.Conv2d):
method __init__ (line 56) | def __init__(self, channels, groups=1):
method forward (line 67) | def forward(self, input):
method get_actual_kernel (line 72) | def get_actual_kernel(self):
class BNAndPadLayer (line 76) | class BNAndPadLayer(nn.Module):
method __init__ (line 77) | def __init__(self,
method forward (line 88) | def forward(self, input):
method weight (line 104) | def weight(self):
method bias (line 108) | def bias(self):
method running_mean (line 112) | def running_mean(self):
method running_var (line 116) | def running_var(self):
method eps (line 120) | def eps(self):
class DiverseBranchBlock (line 124) | class DiverseBranchBlock(nn.Module):
method __init__ (line 125) | def __init__(self, in_channels, out_channels, kernel_size,
method get_equivalent_kernel_bias (line 184) | def get_equivalent_kernel_bias(self):
method switch_to_deploy (line 211) | def switch_to_deploy(self):
method forward (line 228) | def forward(self, inputs):
method init_gamma (line 239) | def init_gamma(self, gamma_value):
method single_init (line 249) | def single_init(self):
class Bottleneck_DBB (line 254) | class Bottleneck_DBB(nn.Module):
method __init__ (line 256) | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_ou...
method forward (line 263) | def forward(self, x):
class C3_DBB (line 266) | class C3_DBB(C3):
method __init__ (line 268) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
FILE: yolo-improve/yolov5-DCN.py
class DCNv2 (line 1) | class DCNv2(nn.Module):
method __init__ (line 2) | def __init__(self, in_channels, out_channels, kernel_size, stride=1,
method forward (line 34) | def forward(self, x):
method reset_parameters (line 56) | def reset_parameters(self):
class Bottleneck_DCN (line 66) | class Bottleneck_DCN(nn.Module):
method __init__ (line 68) | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_ou...
method forward (line 75) | def forward(self, x):
class C3_DCN (line 78) | class C3_DCN(C3):
method __init__ (line 80) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
FILE: yolo-improve/yolov5-DCNV3/commod.py
class DCNV3_YoLo (line 2) | class DCNV3_YoLo(nn.Module):
method __init__ (line 3) | def __init__(self, inc, ouc, k=1, s=1, p=None, g=1, d=1, act=True):
method forward (line 11) | def forward(self, x):
class Bottleneck_DCNV3 (line 19) | class Bottleneck_DCNV3(nn.Module):
method __init__ (line 21) | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_ou...
method forward (line 28) | def forward(self, x):
class C3_DCNV3 (line 31) | class C3_DCNV3(nn.Module):
method __init__ (line 33) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ...
method forward (line 41) | def forward(self, x):
FILE: yolo-improve/yolov5-DCNV3/ops_dcnv3/functions/dcnv3_func.py
class DCNv3Function (line 19) | class DCNv3Function(Function):
method forward (line 22) | def forward(
method backward (line 51) | def backward(ctx, grad_output):
method symbolic (line 64) | def symbolic(g, input, offset, mask, kernel_h, kernel_w, stride_h,
function _get_reference_points (line 92) | def _get_reference_points(spatial_shapes, device, kernel_h, kernel_w, di...
function _generate_dilation_grids (line 123) | def _generate_dilation_grids(spatial_shapes, kernel_h, kernel_w, dilatio...
function dcnv3_core_pytorch (line 148) | def dcnv3_core_pytorch(
FILE: yolo-improve/yolov5-DCNV3/ops_dcnv3/modules/dcnv3.py
function autopad (line 17) | def autopad(k, p=None, d=1): # kernel, padding, dilation
class Conv (line 26) | class Conv(nn.Module):
method __init__ (line 30) | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
method forward (line 36) | def forward(self, x):
method forward_fuse (line 39) | def forward_fuse(self, x):
function _is_power_of_2 (line 42) | def _is_power_of_2(n):
class DCNv3 (line 50) | class DCNv3(nn.Module):
method __init__ (line 51) | def __init__(
method _reset_parameters (line 99) | def _reset_parameters(self):
method forward (line 109) | def forward(self, input):
FILE: yolo-improve/yolov5-DCNV3/ops_dcnv3/setup.py
function get_extensions (line 22) | def get_extensions():
FILE: yolo-improve/yolov5-DCNV3/ops_dcnv3/src/cpu/dcnv3_cpu.cpp
function dcnv3_cpu_forward (line 17) | at::Tensor dcnv3_cpu_forward(const at::Tensor &input, const at::Tensor &...
function dcnv3_cpu_backward (line 28) | std::vector<at::Tensor>
FILE: yolo-improve/yolov5-DCNV3/ops_dcnv3/src/vision.cpp
function PYBIND11_MODULE (line 14) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: yolo-improve/yolov5-DCNV3/ops_dcnv3/test.py
function check_forward_equal_with_pytorch_double (line 34) | def check_forward_equal_with_pytorch_double():
function check_forward_equal_with_pytorch_float (line 64) | def check_forward_equal_with_pytorch_float():
function check_backward_equal_with_pytorch_double (line 93) | def check_backward_equal_with_pytorch_double(channels=4, grad_input=True...
function check_backward_equal_with_pytorch_float (line 156) | def check_backward_equal_with_pytorch_float(channels=4, grad_input=True,...
function check_time_cost (line 220) | def check_time_cost(im2col_step=128):
FILE: yolo-improve/yolov5-DSConv.py
class DSConv (line 5) | class DSConv(_ConvNd):
method __init__ (line 6) | def __init__(self, in_channels, out_channels, kernel_size, stride=1,
method get_weight_res (line 34) | def get_weight_res(self):
method forward (line 74) | def forward(self, input):
class DSConv2D (line 83) | class DSConv2D(Conv):
method __init__ (line 84) | def __init__(self, inc, ouc, k=1, s=1, p=None, g=1, d=1, act=True):
class Bottleneck_DSConv (line 88) | class Bottleneck_DSConv(nn.Module):
method __init__ (line 90) | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_ou...
method forward (line 97) | def forward(self, x):
class C3_DSConv (line 100) | class C3_DSConv(C3):
method __init__ (line 102) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
FILE: yolo-improve/yolov5-DecoupledHead.py
class Decoupled_Detect (line 1) | class Decoupled_Detect(nn.Module):
method __init__ (line 7) | def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detecti...
method forward (line 25) | def forward(self, x):
method _make_grid (line 55) | def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch...
function _initialize_biases (line 65) | def _initialize_biases(self, cf=None): # initialize biases into Detect(...
FILE: yolo-improve/yolov5-DySnakeConv.py
class DySnakeConv (line 4) | class DySnakeConv(nn.Module):
method __init__ (line 5) | def __init__(self, inc, ouc, k=3, act=True) -> None:
method forward (line 13) | def forward(self, x):
class DSConv (line 16) | class DSConv(nn.Module):
method __init__ (line 17) | def __init__(self, in_ch, out_ch, morph, kernel_size=3, if_offset=True...
method forward (line 57) | def forward(self, f):
class DSC (line 78) | class DSC(object):
method __init__ (line 79) | def __init__(self, input_shape, kernel_size, extend_scope, morph):
method _coordinate_map_3D (line 99) | def _coordinate_map_3D(self, offset, if_offset):
method _bilinear_interpolate_3D (line 244) | def _bilinear_interpolate_3D(self, input_feature, y, x):
method deform_conv (line 344) | def deform_conv(self, input, offset, if_offset):
class Bottleneck_DySnake (line 351) | class Bottleneck_DySnake(nn.Module):
method __init__ (line 353) | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_ou...
method forward (line 360) | def forward(self, x):
class C3_DySnake (line 363) | class C3_DySnake(C3):
method __init__ (line 365) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
FILE: yolo-improve/yolov5-EVC.py
class Encoding (line 5) | class Encoding(nn.Module):
method __init__ (line 6) | def __init__(self, in_channels, num_codes):
method scaled_l2 (line 19) | def scaled_l2(x, codewords, scale):
method aggregate (line 35) | def aggregate(assignment_weights, x, codewords):
method forward (line 52) | def forward(self, x):
class Mlp (line 67) | class Mlp(nn.Module):
method __init__ (line 71) | def __init__(self, in_features, hidden_features=None,
method _init_weights (line 82) | def _init_weights(self, m):
method forward (line 88) | def forward(self, x):
class ConvBlock (line 97) | class ConvBlock(nn.Module):
method __init__ (line 98) | def __init__(self, in_channels, out_channels, stride=1, res_conv=False...
method zero_init_last_bn (line 116) | def zero_init_last_bn(self):
method forward (line 119) | def forward(self, x, return_x_2=True):
class Mean (line 140) | class Mean(nn.Module):
method __init__ (line 141) | def __init__(self, dim, keep_dim=False):
method forward (line 146) | def forward(self, input):
class LVCBlock (line 149) | class LVCBlock(nn.Module):
method __init__ (line 150) | def __init__(self, in_channels, out_channels, num_codes, channel_ratio...
method forward (line 166) | def forward(self, x):
class GroupNorm (line 175) | class GroupNorm(nn.GroupNorm):
method __init__ (line 180) | def __init__(self, num_channels, **kwargs):
class DWConv_LMLP (line 183) | class DWConv_LMLP(nn.Module):
method __init__ (line 185) | def __init__(self, in_channels, out_channels, ksize, stride=1, act="si...
method forward (line 198) | def forward(self, x):
class LightMLPBlock (line 203) | class LightMLPBlock(nn.Module):
method __init__ (line 204) | def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="...
method forward (line 229) | def forward(self, x):
class EVCBlock (line 240) | class EVCBlock(nn.Module):
method __init__ (line 241) | def __init__(self, in_channels, out_channels, channel_ratio=4, base_ch...
method forward (line 256) | def forward(self, x):
FILE: yolo-improve/yolov5-FasterBlock.py
class Partial_conv3 (line 2) | class Partial_conv3(nn.Module):
method __init__ (line 3) | def __init__(self, dim, n_div, forward):
method forward_slicing (line 16) | def forward_slicing(self, x):
method forward_split_cat (line 22) | def forward_split_cat(self, x):
class Faster_Block (line 29) | class Faster_Block(nn.Module):
method __init__ (line 30) | def __init__(self,
method forward (line 70) | def forward(self, x):
method forward_layer_scale (line 78) | def forward_layer_scale(self, x):
class C3_Faster (line 85) | class C3_Faster(C3):
method __init__ (line 87) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
FILE: yolo-improve/yolov5-GFPN/extra_modules.py
function conv_bn (line 5) | def conv_bn(in_channels, out_channels, kernel_size, stride, padding, gro...
class RepConv (line 20) | class RepConv(nn.Module):
method __init__ (line 24) | def __init__(self,
method forward (line 78) | def forward(self, inputs):
method get_equivalent_kernel_bias (line 91) | def get_equivalent_kernel_bias(self):
method _pad_1x1_to_3x3_tensor (line 98) | def _pad_1x1_to_3x3_tensor(self, kernel1x1):
method _fuse_bn_tensor (line 104) | def _fuse_bn_tensor(self, branch):
method switch_to_deploy (line 134) | def switch_to_deploy(self):
class Swish (line 159) | class Swish(nn.Module):
method __init__ (line 160) | def __init__(self, inplace=True):
method forward (line 164) | def forward(self, x):
function get_activation (line 171) | def get_activation(name='silu', inplace=True):
function get_norm (line 198) | def get_norm(name, out_channels, inplace=True):
class ConvBNAct (line 205) | class ConvBNAct(nn.Module):
method __init__ (line 207) | def __init__(
method forward (line 238) | def forward(self, x):
method fuseforward (line 246) | def fuseforward(self, x):
class BasicBlock_3x3_Reverse (line 249) | class BasicBlock_3x3_Reverse(nn.Module):
method __init__ (line 250) | def __init__(self,
method forward (line 263) | def forward(self, x):
class SPP (line 271) | class SPP(nn.Module):
method __init__ (line 272) | def __init__(
method forward (line 291) | def forward(self, x):
class CSPStage (line 301) | class CSPStage(nn.Module):
method __init__ (line 302) | def __init__(self,
method forward (line 337) | def forward(self, x):
FILE: yolo-improve/yolov5-GOLDYOLO/common.py
function conv_bn (line 3) | def conv_bn(in_channels, out_channels, kernel_size, stride, padding, gro...
class RepVGGBlock (line 12) | class RepVGGBlock(nn.Module):
method __init__ (line 17) | def __init__(self, in_channels, out_channels, kernel_size=3,
method forward (line 66) | def forward(self, inputs):
method get_equivalent_kernel_bias (line 78) | def get_equivalent_kernel_bias(self):
method _pad_1x1_to_3x3_tensor (line 84) | def _pad_1x1_to_3x3_tensor(self, kernel1x1):
method _fuse_bn_tensor (line 90) | def _fuse_bn_tensor(self, branch):
method switch_to_deploy (line 118) | def switch_to_deploy(self):
function onnx_AdaptiveAvgPool2d (line 139) | def onnx_AdaptiveAvgPool2d(x, output_size):
function get_avg_pool (line 146) | def get_avg_pool():
class SimFusion_3in (line 153) | class SimFusion_3in(nn.Module):
method __init__ (line 154) | def __init__(self, in_channel_list, out_channels):
method forward (line 162) | def forward(self, x):
class SimFusion_4in (line 175) | class SimFusion_4in(nn.Module):
method __init__ (line 176) | def __init__(self):
method forward (line 180) | def forward(self, x):
class IFM (line 195) | class IFM(nn.Module):
method __init__ (line 196) | def __init__(self, inc, ouc, embed_dim_p=96, fuse_block_num=3) -> None:
method forward (line 205) | def forward(self, x):
class h_sigmoid (line 208) | class h_sigmoid(nn.Module):
method __init__ (line 209) | def __init__(self, inplace=True):
method forward (line 213) | def forward(self, x):
class InjectionMultiSum_Auto_pool (line 216) | class InjectionMultiSum_Auto_pool(nn.Module):
method __init__ (line 217) | def __init__(
method forward (line 232) | def forward(self, x):
function get_shape (line 263) | def get_shape(tensor):
class PyramidPoolAgg (line 269) | class PyramidPoolAgg(nn.Module):
method __init__ (line 270) | def __init__(self, inc, ouc, stride, pool_mode='torch'):
method forward (line 279) | def forward(self, inputs):
function drop_path (line 296) | def drop_path(x, drop_prob: float = 0., training: bool = False):
class Mlp (line 313) | class Mlp(nn.Module):
method __init__ (line 314) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 324) | def forward(self, x):
class DropPath (line 333) | class DropPath(nn.Module):
method __init__ (line 337) | def __init__(self, drop_prob=None):
method forward (line 341) | def forward(self, x):
class Attention (line 344) | class Attention(torch.nn.Module):
method __init__ (line 345) | def __init__(self, dim, key_dim, num_heads, attn_ratio=4):
method forward (line 361) | def forward(self, x): # x (B,N,C)
class top_Block (line 377) | class top_Block(nn.Module):
method __init__ (line 379) | def __init__(self, dim, key_dim, num_heads, mlp_ratio=4., attn_ratio=2...
method forward (line 393) | def forward(self, x1):
class TopBasicLayer (line 398) | class TopBasicLayer(nn.Module):
method __init__ (line 399) | def __init__(self, embedding_dim, ouc_list, block_num=2, key_dim=8, nu...
method forward (line 412) | def forward(self, x):
class AdvPoolFusion (line 418) | class AdvPoolFusion(nn.Module):
method forward (line 419) | def forward(self, x):
FILE: yolo-improve/yolov5-NWD.py
function wasserstein_loss (line 1) | def wasserstein_loss(pred, target, eps=1e-7, constant=12.8):
FILE: yolo-improve/yolov5-OTA/loss.py
class ComputeLossOTA (line 6) | class ComputeLossOTA:
method __init__ (line 8) | def __init__(self, model, autobalance=False):
method __call__ (line 32) | def __call__(self, p, targets, imgs): # predictions, targets, model
method build_targets (line 92) | def build_targets(self, p, targets, imgs):
method find_3_positive (line 244) | def find_3_positive(self, p, targets):
FILE: yolo-improve/yolov5-RepNCSPELAN.py
class RepConvN (line 1) | class RepConvN(nn.Module):
method __init__ (line 7) | def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False...
method forward_fuse (line 19) | def forward_fuse(self, x):
method forward (line 23) | def forward(self, x):
method get_equivalent_kernel_bias (line 28) | def get_equivalent_kernel_bias(self):
method _avg_to_3x3_tensor (line 34) | def _avg_to_3x3_tensor(self, avgp):
method _pad_1x1_to_3x3_tensor (line 43) | def _pad_1x1_to_3x3_tensor(self, kernel1x1):
method _fuse_bn_tensor (line 49) | def _fuse_bn_tensor(self, branch):
method fuse_convs (line 76) | def fuse_convs(self):
class RepNBottleneck (line 101) | class RepNBottleneck(nn.Module):
method __init__ (line 103) | def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch...
method forward (line 110) | def forward(self, x):
class RepNCSP (line 113) | class RepNCSP(nn.Module):
method __init__ (line 115) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ...
method forward (line 123) | def forward(self, x):
class RepNCSPELAN4 (line 126) | class RepNCSPELAN4(nn.Module):
method __init__ (line 128) | def __init__(self, c1, c2, c3, c4, c5=1): # ch_in, ch_out, number, sh...
method forward (line 136) | def forward(self, x):
method forward_split (line 141) | def forward_split(self, x):
FILE: yolo-improve/yolov5-SAConv.py
class ConvAWS2d (line 1) | class ConvAWS2d(nn.Conv2d):
method __init__ (line 2) | def __init__(self,
method _get_weight (line 23) | def _get_weight(self, weight):
method forward (line 32) | def forward(self, x):
method _load_from_state_dict (line 36) | def _load_from_state_dict(self, state_dict, prefix, local_metadata, st...
class SAConv2d (line 50) | class SAConv2d(ConvAWS2d):
method __init__ (line 51) | def __init__(self,
method forward (line 98) | def forward(self, x):
class Bottleneck_SAC (line 127) | class Bottleneck_SAC(nn.Module):
method __init__ (line 129) | def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_ou...
method forward (line 136) | def forward(self, x):
class C3_SAC (line 139) | class C3_SAC(C3):
method __init__ (line 141) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ...
FILE: yolo-improve/yolov5-TSCODE.py
class TSCODE_Detect (line 3) | class TSCODE_Detect(nn.Module):
method __init__ (line 9) | def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detecti...
method forward (line 29) | def forward(self, x_):
method _make_grid (line 61) | def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch...
class Decoupled_Detect (line 71) | class Decoupled_Detect(nn.Module):
method __init__ (line 77) | def __init__(self, nc=80, anchors=(), ch=(), inplace=True): # detecti...
method forward (line 95) | def forward(self, x):
method _make_grid (line 125) | def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch...
function _initialize_biases (line 135) | def _initialize_biases(self, cf=None): # initialize biases into Detect(...
class SCE (line 159) | class SCE(nn.Module):
method __init__ (line 160) | def __init__(self, c1):
method forward (line 164) | def forward(self, x):
class DPE (line 169) | class DPE(nn.Module):
method __init__ (line 170) | def __init__(self, c1, c2):
method forward (line 186) | def forward(self, x):
FILE: yolo-improve/yolov5-aLRPLoss.py
class aLRPLoss (line 1) | class aLRPLoss(torch.autograd.Function):
method forward (line 3) | def forward(ctx, logits, targets, regression_losses, delta=1., eps=1e-5):
method backward (line 66) | def backward(ctx, out_grad1, out_grad2, out_grad3):
function __call__ (line 79) | def __call__(self, p, targets): # predictions, targets
FILE: yolo-improve/yolov5-asf.py
class Zoom_cat (line 3) | class Zoom_cat(nn.Module):
method __init__ (line 4) | def __init__(self):
method forward (line 7) | def forward(self, x):
class ScalSeq (line 16) | class ScalSeq(nn.Module):
method __init__ (line 17) | def __init__(self, inc, channel):
method forward (line 26) | def forward(self, x):
class Add (line 43) | class Add(nn.Module):
method __init__ (line 45) | def __init__(self):
method forward (line 48) | def forward(self, x):
class channel_att (line 53) | class channel_att(nn.Module):
method __init__ (line 54) | def __init__(self, channel, b=1, gamma=2):
method forward (line 63) | def forward(self, x):
class local_att (line 71) | class local_att(nn.Module):
method __init__ (line 72) | def __init__(self, channel, reduction=16):
method forward (line 86) | def forward(self, x):
class attention_model (line 102) | class attention_model(nn.Module):
method __init__ (line 104) | def __init__(self, ch = 256):
method forward (line 108) | def forward(self, x):
FILE: yolo-improve/yolov5-backbone/CVPR2023-EfficientViT/EfficientViT.py
class Conv2d_BN (line 19) | class Conv2d_BN(torch.nn.Sequential):
method __init__ (line 20) | def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1,
method fuse (line 30) | def fuse(self):
function replace_batchnorm (line 42) | def replace_batchnorm(net):
class PatchMerging (line 52) | class PatchMerging(torch.nn.Module):
method __init__ (line 53) | def __init__(self, dim, out_dim, input_resolution):
method forward (line 62) | def forward(self, x):
class Residual (line 67) | class Residual(torch.nn.Module):
method __init__ (line 68) | def __init__(self, m, drop=0.):
method forward (line 73) | def forward(self, x):
class FFN (line 81) | class FFN(torch.nn.Module):
method __init__ (line 82) | def __init__(self, ed, h, resolution):
method forward (line 88) | def forward(self, x):
class CascadedGroupAttention (line 93) | class CascadedGroupAttention(torch.nn.Module):
method __init__ (line 104) | def __init__(self, dim, key_dim, num_heads=8,
method train (line 141) | def train(self, mode=True):
method forward (line 148) | def forward(self, x): # x (B,C,H,W)
class LocalWindowAttention (line 173) | class LocalWindowAttention(torch.nn.Module):
method __init__ (line 185) | def __init__(self, dim, key_dim, num_heads=8,
method forward (line 202) | def forward(self, x):
class EfficientViTBlock (line 238) | class EfficientViTBlock(torch.nn.Module):
method __init__ (line 251) | def __init__(self, type,
method forward (line 269) | def forward(self, x):
class EfficientViT (line 273) | class EfficientViT(torch.nn.Module):
method __init__ (line 274) | def __init__(self, img_size=400,
method forward (line 321) | def forward(self, x):
function EfficientViT_M0 (line 392) | def EfficientViT_M0(pretrained='', frozen_stages=0, distillation=False, ...
function EfficientViT_M1 (line 400) | def EfficientViT_M1(pretrained='', frozen_stages=0, distillation=False, ...
function EfficientViT_M2 (line 408) | def EfficientViT_M2(pretrained='', frozen_stages=0, distillation=False, ...
function EfficientViT_M3 (line 416) | def EfficientViT_M3(pretrained='', frozen_stages=0, distillation=False, ...
function EfficientViT_M4 (line 424) | def EfficientViT_M4(pretrained='', frozen_stages=0, distillation=False, ...
function EfficientViT_M5 (line 432) | def EfficientViT_M5(pretrained='', frozen_stages=0, distillation=False, ...
function update_weight (line 440) | def update_weight(model_dict, weight_dict):
FILE: yolo-improve/yolov5-backbone/CVPR2024-StarNet/starnet.py
class ConvBN (line 26) | class ConvBN(torch.nn.Sequential):
method __init__ (line 27) | def __init__(self, in_planes, out_planes, kernel_size=1, stride=1, pad...
class Block (line 36) | class Block(nn.Module):
method __init__ (line 37) | def __init__(self, dim, mlp_ratio=3, drop_path=0.):
method forward (line 47) | def forward(self, x):
class StarNet (line 57) | class StarNet(nn.Module):
method __init__ (line 58) | def __init__(self, base_dim=32, depths=[3, 3, 12, 5], mlp_ratio=4, dro...
method _init_weights (line 79) | def _init_weights(self, m):
method forward (line 88) | def forward(self, x):
function starnet_s1 (line 99) | def starnet_s1(pretrained=False, **kwargs):
function starnet_s2 (line 109) | def starnet_s2(pretrained=False, **kwargs):
function starnet_s3 (line 119) | def starnet_s3(pretrained=False, **kwargs):
function starnet_s4 (line 129) | def starnet_s4(pretrained=False, **kwargs):
function starnet_s050 (line 140) | def starnet_s050(pretrained=False, **kwargs):
function starnet_s100 (line 145) | def starnet_s100(pretrained=False, **kwargs):
function starnet_s150 (line 150) | def starnet_s150(pretrained=False, **kwargs):
FILE: yolo-improve/yolov5-backbone/ConvNextV2/convnextv2.py
class LayerNorm (line 16) | class LayerNorm(nn.Module):
method __init__ (line 22) | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_l...
method forward (line 32) | def forward(self, x):
class GRN (line 42) | class GRN(nn.Module):
method __init__ (line 45) | def __init__(self, dim):
method forward (line 50) | def forward(self, x):
class Block (line 55) | class Block(nn.Module):
method __init__ (line 62) | def __init__(self, dim, drop_path=0.):
method forward (line 72) | def forward(self, x):
class ConvNeXtV2 (line 86) | class ConvNeXtV2(nn.Module):
method __init__ (line 97) | def __init__(self, in_chans=3, num_classes=1000,
method _init_weights (line 132) | def _init_weights(self, m):
method forward (line 137) | def forward(self, x):
function update_weight (line 145) | def update_weight(model_dict, weight_dict):
function convnextv2_atto (line 155) | def convnextv2_atto(weights='', **kwargs):
function convnextv2_femto (line 161) | def convnextv2_femto(weights='', **kwargs):
function convnextv2_pico (line 167) | def convnextv2_pico(weights='', **kwargs):
function convnextv2_nano (line 173) | def convnextv2_nano(weights='', **kwargs):
function convnextv2_tiny (line 179) | def convnextv2_tiny(weights='', **kwargs):
function convnextv2_base (line 185) | def convnextv2_base(weights='', **kwargs):
function convnextv2_large (line 191) | def convnextv2_large(weights='', **kwargs):
function convnextv2_huge (line 197) | def convnextv2_huge(weights='', **kwargs):
FILE: yolo-improve/yolov5-backbone/EMO/emo.py
function get_act (line 14) | def get_act(act_layer='relu'):
class LayerNorm2d (line 32) | class LayerNorm2d(nn.Module):
method __init__ (line 34) | def __init__(self, normalized_shape, eps=1e-6, elementwise_affine=True):
method forward (line 38) | def forward(self, x):
function get_norm (line 44) | def get_norm(norm_layer='in_1d'):
class ConvNormAct (line 60) | class ConvNormAct(nn.Module):
method __init__ (line 62) | def __init__(self, dim_in, dim_out, kernel_size, stride=1, dilation=1,...
method forward (line 72) | def forward(self, x):
class MSPatchEmb (line 84) | class MSPatchEmb(nn.Module):
method __init__ (line 86) | def __init__(self, dim_in, emb_dim, kernel_size=2, c_group=-1, stride=...
method forward (line 99) | def forward(self, x):
class iRMB (line 108) | class iRMB(nn.Module):
method __init__ (line 109) | def __init__(self, dim_in, dim_out, norm_in=True, has_skip=True, exp_r...
method forward (line 139) | def forward(self, x):
class EMO (line 189) | class EMO(nn.Module):
method __init__ (line 190) | def __init__(self, dim_in=3, num_classes=1000, img_size=224,
method _init_weights (line 237) | def _init_weights(self, m):
method no_weight_decay (line 249) | def no_weight_decay(self):
method no_weight_decay_keywords (line 253) | def no_weight_decay_keywords(self):
method no_ft_keywords (line 257) | def no_ft_keywords(self):
method ft_head_keywords (line 262) | def ft_head_keywords(self):
method get_classifier (line 265) | def get_classifier(self):
method reset_classifier (line 268) | def reset_classifier(self, num_classes):
method check_bn (line 272) | def check_bn(self):
method forward_features (line 278) | def forward_features(self, x):
method forward (line 296) | def forward(self, x):
function update_weight (line 301) | def update_weight(model_dict, weight_dict):
function EMO_1M (line 311) | def EMO_1M(weights='', **kwargs):
function EMO_2M (line 324) | def EMO_2M(weights='', **kwargs):
function EMO_5M (line 337) | def EMO_5M(weights='', **kwargs):
function EMO_6M (line 350) | def EMO_6M(weights='', **kwargs):
FILE: yolo-improve/yolov5-backbone/EfficientFormerV2/EfficientFormerV2.py
class Attention4D (line 64) | class Attention4D(torch.nn.Module):
method __init__ (line 65) | def __init__(self, dim=384, key_dim=32, num_heads=8,
method train (line 125) | def train(self, mode=True):
method forward (line 132) | def forward(self, x): # x (B,N,C)
function stem (line 164) | def stem(in_chs, out_chs, act_layer=nn.ReLU):
class LGQuery (line 175) | class LGQuery(torch.nn.Module):
method __init__ (line 176) | def __init__(self, in_dim, out_dim, resolution1, resolution2):
method forward (line 186) | def forward(self, x):
class Attention4DDownsample (line 194) | class Attention4DDownsample(torch.nn.Module):
method __init__ (line 195) | def __init__(self, dim=384, key_dim=16, num_heads=8,
method train (line 261) | def train(self, mode=True):
method forward (line 268) | def forward(self, x): # x (B,N,C)
class Embedding (line 293) | class Embedding(nn.Module):
method __init__ (line 294) | def __init__(self, patch_size=3, stride=2, padding=1,
method forward (line 330) | def forward(self, x):
class Mlp (line 343) | class Mlp(nn.Module):
method __init__ (line 349) | def __init__(self, in_features, hidden_features=None,
method _init_weights (line 369) | def _init_weights(self, m):
method forward (line 375) | def forward(self, x):
class AttnFFN (line 393) | class AttnFFN(nn.Module):
method __init__ (line 394) | def __init__(self, dim, mlp_ratio=4.,
method forward (line 416) | def forward(self, x):
class FFN (line 427) | class FFN(nn.Module):
method __init__ (line 428) | def __init__(self, dim, pool_size=3, mlp_ratio=4.,
method forward (line 445) | def forward(self, x):
function eformer_block (line 453) | def eformer_block(dim, index, layers,
class EfficientFormerV2 (line 489) | class EfficientFormerV2(nn.Module):
method __init__ (line 490) | def __init__(self, layers, embed_dims=None,
method forward_tokens (line 557) | def forward_tokens(self, x):
method forward (line 567) | def forward(self, x):
function update_weight (line 572) | def update_weight(model_dict, weight_dict):
function efficientformerv2_s0 (line 582) | def efficientformerv2_s0(weights='', **kwargs):
function efficientformerv2_s1 (line 596) | def efficientformerv2_s1(weights='', **kwargs):
function efficientformerv2_s2 (line 610) | def efficientformerv2_s2(weights='', **kwargs):
function efficientformerv2_l (line 624) | def efficientformerv2_l(weights='', **kwargs):
FILE: yolo-improve/yolov5-backbone/EfficientViT/efficientViT.py
function build_kwargs_from_config (line 19) | def build_kwargs_from_config(config: Dict, target_func: Callable) -> Dic...
function build_norm (line 32) | def build_norm(name="bn2d", num_features=None, **kwargs) -> Optional[nn....
function build_act (line 50) | def build_act(name: str, **kwargs) -> Optional[nn.Module]:
function get_same_padding (line 58) | def get_same_padding(kernel_size: Union[int, Tuple[int, ...]]) -> Union[...
function list_sum (line 65) | def list_sum(x: List) -> Any:
function merge_tensor (line 68) | def merge_tensor(x: List[torch.Tensor], mode="cat", dim=1) -> torch.Tensor:
function resize (line 76) | def resize(
function val2list (line 96) | def val2list(x: Union[List, Tuple, Any], repeat_time=1) -> List:
function val2tuple (line 101) | def val2tuple(x: Union[List, Tuple, Any], min_len: int = 1, idx_repeat: ...
class ConvLayer (line 111) | class ConvLayer(nn.Module):
method __init__ (line 112) | def __init__(
method forward (line 144) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class UpSampleLayer (line 155) | class UpSampleLayer(nn.Module):
method __init__ (line 156) | def __init__(
method forward (line 169) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class LinearLayer (line 173) | class LinearLayer(nn.Module):
method __init__ (line 174) | def __init__(
method _try_squeeze (line 190) | def _try_squeeze(self, x: torch.Tensor) -> torch.Tensor:
method forward (line 195) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class IdentityLayer (line 207) | class IdentityLayer(nn.Module):
method forward (line 208) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class DSConv (line 217) | class DSConv(nn.Module):
method __init__ (line 218) | def __init__(
method forward (line 253) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class MBConv (line 259) | class MBConv(nn.Module):
method __init__ (line 260) | def __init__(
method forward (line 307) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class LiteMSA (line 314) | class LiteMSA(nn.Module):
method __init__ (line 316) | def __init__(
method forward (line 369) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class EfficientViTBlock (line 414) | class EfficientViTBlock(nn.Module):
method __init__ (line 415) | def __init__(self, in_channels: int, heads_ratio: float = 1.0, dim=32,...
method forward (line 437) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class ResidualBlock (line 448) | class ResidualBlock(nn.Module):
method __init__ (line 449) | def __init__(
method forward_main (line 463) | def forward_main(self, x: torch.Tensor) -> torch.Tensor:
method forward (line 469) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class DAGBlock (line 481) | class DAGBlock(nn.Module):
method __init__ (line 482) | def __init__(
method forward (line 502) | def forward(self, feature_dict: Dict[str, torch.Tensor]) -> Dict[str, ...
class OpSequential (line 513) | class OpSequential(nn.Module):
method __init__ (line 514) | def __init__(self, op_list: List[Optional[nn.Module]]):
method forward (line 522) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class EfficientViTBackbone (line 527) | class EfficientViTBackbone(nn.Module):
method __init__ (line 528) | def __init__(self, width_list: List[int], depth_list: List[int], in_ch...
method build_local_block (line 605) | def build_local_block(in_channels: int, out_channels: int, stride: int...
method forward (line 627) | def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]:
function update_weight (line 636) | def update_weight(model_dict, weight_dict):
function efficientvit_b0 (line 647) | def efficientvit_b0(weights='', **kwargs) -> EfficientViTBackbone:
function efficientvit_b1 (line 659) | def efficientvit_b1(weights='', **kwargs) -> EfficientViTBackbone:
function efficientvit_b2 (line 671) | def efficientvit_b2(weights='', **kwargs) -> EfficientViTBackbone:
function efficientvit_b3 (line 683) | def efficientvit_b3(weights='', **kwargs) -> EfficientViTBackbone:
FILE: yolo-improve/yolov5-backbone/FocalNet/FocalNet.py
function update_weight (line 17) | def update_weight(model_dict, weight_dict):
class Mlp (line 27) | class Mlp(nn.Module):
method __init__ (line 28) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 37) | def forward(self, x):
class FocalModulation (line 45) | class FocalModulation(nn.Module):
method __init__ (line 46) | def __init__(self, dim, focal_window, focal_level, focal_factor=2, bia...
method forward (line 78) | def forward(self, x):
method extra_repr (line 113) | def extra_repr(self) -> str:
method flops (line 116) | def flops(self, N):
class FocalNetBlock (line 136) | class FocalNetBlock(nn.Module):
method __init__ (line 154) | def __init__(self, dim, input_resolution, mlp_ratio=4., drop=0., drop_...
method forward (line 189) | def forward(self, x):
method extra_repr (line 206) | def extra_repr(self) -> str:
method flops (line 210) | def flops(self):
class BasicLayer (line 225) | class BasicLayer(nn.Module):
method __init__ (line 248) | def __init__(self, dim, out_dim, input_resolution, depth,
method forward (line 296) | def forward(self, x, H, W):
method extra_repr (line 311) | def extra_repr(self) -> str:
method flops (line 314) | def flops(self):
class PatchEmbed (line 322) | class PatchEmbed(nn.Module):
method __init__ (line 333) | def __init__(self, img_size=(224, 224), patch_size=4, in_chans=3, embe...
method forward (line 360) | def forward(self, x):
method flops (line 370) | def flops(self):
class FocalNet (line 377) | class FocalNet(nn.Module):
method __init__ (line 400) | def __init__(self,
method _init_weights (line 481) | def _init_weights(self, m):
method no_weight_decay (line 491) | def no_weight_decay(self):
method no_weight_decay_keywords (line 495) | def no_weight_decay_keywords(self):
method forward (line 498) | def forward(self, x):
method flops (line 516) | def flops(self):
function focalnet_tiny_srf (line 540) | def focalnet_tiny_srf(pretrained=False, **kwargs):
function focalnet_small_srf (line 548) | def focalnet_small_srf(pretrained=False, **kwargs):
function focalnet_base_srf (line 556) | def focalnet_base_srf(pretrained=False, **kwargs):
function focalnet_tiny_lrf (line 564) | def focalnet_tiny_lrf(pretrained=False, **kwargs):
function focalnet_small_lrf (line 572) | def focalnet_small_lrf(pretrained=False, **kwargs):
function focalnet_base_lrf (line 580) | def focalnet_base_lrf(pretrained=False, **kwargs):
function focalnet_tiny_iso (line 588) | def focalnet_tiny_iso(pretrained=False, **kwargs):
function focalnet_small_iso (line 596) | def focalnet_small_iso(pretrained=False, **kwargs):
function focalnet_base_iso (line 604) | def focalnet_base_iso(pretrained=False, **kwargs):
function focalnet_large_fl3 (line 613) | def focalnet_large_fl3(pretrained=False, **kwargs):
function focalnet_large_fl4 (line 621) | def focalnet_large_fl4(pretrained=False, **kwargs):
function focalnet_xlarge_fl3 (line 629) | def focalnet_xlarge_fl3(pretrained=False, **kwargs):
function focalnet_xlarge_fl4 (line 637) | def focalnet_xlarge_fl4(pretrained=False, **kwargs):
function focalnet_huge_fl3 (line 645) | def focalnet_huge_fl3(pretrained=False, **kwargs):
function focalnet_huge_fl4 (line 653) | def focalnet_huge_fl4(pretrained=False, **kwargs):
FILE: yolo-improve/yolov5-backbone/LSKNet/lsknet.py
class Mlp (line 10) | class Mlp(nn.Module):
method __init__ (line 11) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 21) | def forward(self, x):
class LSKblock (line 31) | class LSKblock(nn.Module):
method __init__ (line 32) | def __init__(self, dim):
method forward (line 41) | def forward(self, x):
class Attention (line 59) | class Attention(nn.Module):
method __init__ (line 60) | def __init__(self, d_model):
method forward (line 68) | def forward(self, x):
class Block (line 78) | class Block(nn.Module):
method __init__ (line 79) | def __init__(self, dim, mlp_ratio=4., drop=0.,drop_path=0., act_layer=...
method forward (line 93) | def forward(self, x):
class OverlapPatchEmbed (line 99) | class OverlapPatchEmbed(nn.Module):
method __init__ (line 103) | def __init__(self, img_size=224, patch_size=7, stride=4, in_chans=3, e...
method forward (line 111) | def forward(self, x):
class LSKNet (line 117) | class LSKNet(nn.Module):
method __init__ (line 118) | def __init__(self, img_size=224, in_chans=3, embed_dims=[64, 128, 256,...
method forward (line 149) | def forward(self, x):
class DWConv (line 166) | class DWConv(nn.Module):
method __init__ (line 167) | def __init__(self, dim=768):
method forward (line 171) | def forward(self, x):
function update_weight (line 175) | def update_weight(model_dict, weight_dict):
function lsknet_t (line 185) | def lsknet_t(weights=''):
function lsknet_s (line 191) | def lsknet_s(weights=''):
FILE: yolo-improve/yolov5-backbone/MobileNetV4/mobilenetv4.py
function make_divisible (line 215) | def make_divisible(
function conv_2d (line 245) | def conv_2d(inp, oup, kernel_size=3, stride=1, groups=1, bias=False, nor...
class InvertedResidual (line 255) | class InvertedResidual(nn.Module):
method __init__ (line 256) | def __init__(self, inp, oup, stride, expand_ratio, act=False):
method forward (line 268) | def forward(self, x):
class UniversalInvertedBottleneckBlock (line 274) | class UniversalInvertedBottleneckBlock(nn.Module):
method __init__ (line 275) | def __init__(self,
method forward (line 306) | def forward(self, x):
function build_blocks (line 319) | def build_blocks(layer_spec):
class MobileNetV4 (line 347) | class MobileNetV4(nn.Module):
method __init__ (line 348) | def __init__(self, model):
method forward (line 376) | def forward(self, x):
function MobileNetV4ConvSmall (line 386) | def MobileNetV4ConvSmall():
function MobileNetV4ConvMedium (line 390) | def MobileNetV4ConvMedium():
function MobileNetV4ConvLarge (line 394) | def MobileNetV4ConvLarge():
function MobileNetV4HybridMedium (line 398) | def MobileNetV4HybridMedium():
function MobileNetV4HybridLarge (line 402) | def MobileNetV4HybridLarge():
FILE: yolo-improve/yolov5-backbone/NextViT/NextViT.py
class ConvBNReLU (line 14) | class ConvBNReLU(nn.Module):
method __init__ (line 15) | def __init__(
method forward (line 28) | def forward(self, x):
function _make_divisible (line 35) | def _make_divisible(v, divisor, min_value=None):
class PatchEmbed (line 45) | class PatchEmbed(nn.Module):
method __init__ (line 46) | def __init__(self,
method forward (line 65) | def forward(self, x):
class MHCA (line 69) | class MHCA(nn.Module):
method __init__ (line 73) | def __init__(self, out_channels, head_dim):
method forward (line 82) | def forward(self, x):
class Mlp (line 90) | class Mlp(nn.Module):
method __init__ (line 91) | def __init__(self, in_features, out_features=None, mlp_ratio=None, dro...
method forward (line 100) | def forward(self, x):
class NCB (line 109) | class NCB(nn.Module):
method __init__ (line 113) | def __init__(self, in_channels, out_channels, stride=1, path_dropout=0,
method forward (line 130) | def forward(self, x):
class E_MHSA (line 141) | class E_MHSA(nn.Module):
method __init__ (line 145) | def __init__(self, dim, out_dim=None, head_dim=32, qkv_bias=True, qk_s...
method forward (line 166) | def forward(self, x):
class NTB (line 197) | class NTB(nn.Module):
method __init__ (line 201) | def __init__(
method forward (line 230) | def forward(self, x):
class NextViT (line 253) | class NextViT(nn.Module):
method __init__ (line 254) | def __init__(self, stem_chs, depths, path_dropout, attn_drop=0, drop=0...
method _initialize_weights (line 310) | def _initialize_weights(self):
method forward (line 324) | def forward(self, x):
function update_weight (line 337) | def update_weight(model_dict, weight_dict):
function nextvit_small (line 347) | def nextvit_small(weights=''):
function nextvit_base (line 355) | def nextvit_base(weights=''):
function nextvit_large (line 363) | def nextvit_large(weights=''):
FILE: yolo-improve/yolov5-backbone/ODConv/od_mobilenetv2.py
function _make_divisible (line 10) | def _make_divisible(v, divisor, min_value=None):
class ConvBNReLU (line 30) | class ConvBNReLU(nn.Sequential):
method __init__ (line 31) | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, gro...
class ODConvBNReLU (line 40) | class ODConvBNReLU(nn.Sequential):
method __init__ (line 41) | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, gro...
class InvertedResidual (line 52) | class InvertedResidual(nn.Module):
method __init__ (line 53) | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.Batch...
method forward (line 75) | def forward(self, x):
class OD_MobileNetV2 (line 82) | class OD_MobileNetV2(nn.Module):
method __init__ (line 83) | def __init__(self,
method net_update_temperature (line 160) | def net_update_temperature(self, temperature):
method forward (line 165) | def forward(self, x):
function update_weight (line 175) | def update_weight(model_dict, weight_dict):
function od_mobilenetv2_050 (line 185) | def od_mobilenetv2_050(weights=None, kernel_num=1):
function od_mobilenetv2_075 (line 192) | def od_mobilenetv2_075(weights=None, kernel_num=1):
function od_mobilenetv2_100 (line 199) | def od_mobilenetv2_100(weights=None, kernel_num=1):
FILE: yolo-improve/yolov5-backbone/ODConv/od_resnet.py
function odconv3x3 (line 9) | def odconv3x3(in_planes, out_planes, stride=1, reduction=0.0625, kernel_...
function odconv1x1 (line 14) | def odconv1x1(in_planes, out_planes, stride=1, reduction=0.0625, kernel_...
class BasicBlock (line 19) | class BasicBlock(nn.Module):
method __init__ (line 22) | def __init__(self, inplanes, planes, stride=1, downsample=None, reduct...
method forward (line 32) | def forward(self, x):
class Bottleneck (line 50) | class Bottleneck(nn.Module):
method __init__ (line 53) | def __init__(self, inplanes, planes, stride=1, downsample=None, reduct...
method forward (line 65) | def forward(self, x):
class OD_ResNet (line 87) | class OD_ResNet(nn.Module):
method __init__ (line 88) | def __init__(self, block, layers, num_classes=1000, dropout=0.1, reduc...
method net_update_temperature (line 113) | def net_update_temperature(self, temperature):
method _make_layer (line 118) | def _make_layer(self, block, planes, blocks, stride=1, reduction=0.625...
method forward (line 134) | def forward(self, x):
function update_weight (line 147) | def update_weight(model_dict, weight_dict):
function od_resnet18 (line 157) | def od_resnet18(weights=None, kernel_num=1):
function od_resnet34 (line 164) | def od_resnet34(weights=None, kernel_num=1):
function od_resnet50 (line 171) | def od_resnet50(weights=None, kernel_num=1):
function od_resnet101 (line 178) | def od_resnet101(weights=None, kernel_num=1):
FILE: yolo-improve/yolov5-backbone/ODConv/odconv.py
class Attention (line 7) | class Attention(nn.Module):
method __init__ (line 8) | def __init__(self, in_planes, out_planes, kernel_size, groups=1, reduc...
method _initialize_weights (line 43) | def _initialize_weights(self):
method update_temperature (line 53) | def update_temperature(self, temperature):
method skip (line 57) | def skip(_):
method get_channel_attention (line 60) | def get_channel_attention(self, x):
method get_filter_attention (line 64) | def get_filter_attention(self, x):
method get_spatial_attention (line 68) | def get_spatial_attention(self, x):
method get_kernel_attention (line 73) | def get_kernel_attention(self, x):
method forward (line 78) | def forward(self, x):
class ODConv2d (line 86) | class ODConv2d(nn.Module):
method __init__ (line 87) | def __init__(self, in_planes, out_planes, kernel_size, stride=1, paddi...
method _initialize_weights (line 109) | def _initialize_weights(self):
method update_temperature (line 113) | def update_temperature(self, temperature):
method _forward_impl_common (line 116) | def _forward_impl_common(self, x):
method _forward_impl_pw1x (line 132) | def _forward_impl_pw1x(self, x):
method forward (line 140) | def forward(self, x):
FILE: yolo-improve/yolov5-backbone/ODConvFuse/od_mobilenetv2.py
function fuse_conv_bn (line 9) | def fuse_conv_bn(conv, bn):
function _make_divisible (line 42) | def _make_divisible(v, divisor, min_value=None):
class ConvBNReLU (line 62) | class ConvBNReLU(nn.Sequential):
method __init__ (line 63) | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, gro...
method fuse (line 71) | def fuse(self):
class ODConvBNReLU (line 77) | class ODConvBNReLU(nn.Sequential):
method __init__ (line 78) | def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, gro...
class InvertedResidual (line 89) | class InvertedResidual(nn.Module):
method __init__ (line 90) | def __init__(self, inp, oup, stride, expand_ratio, norm_layer=nn.Batch...
method forward (line 112) | def forward(self, x):
class OD_MobileNetV2 (line 119) | class OD_MobileNetV2(nn.Module):
method __init__ (line 120) | def __init__(self,
method net_update_temperature (line 197) | def net_update_temperature(self, temperature):
method forward (line 202) | def forward(self, x):
function update_weight (line 212) | def update_weight(model_dict, weight_dict):
function od_mobilenetv2_050 (line 222) | def od_mobilenetv2_050(weights=None, kernel_num=1):
function od_mobilenetv2_075 (line 229) | def od_mobilenetv2_075(weights=None, kernel_num=1):
function od_mobilenetv2_100 (line 236) | def od_mobilenetv2_100(weights=None, kernel_num=1):
FILE: yolo-improve/yolov5-backbone/ODConvFuse/od_resnet.py
function odconv3x3 (line 9) | def odconv3x3(in_planes, out_planes, stride=1, reduction=0.0625, kernel_...
function odconv1x1 (line 14) | def odconv1x1(in_planes, out_planes, stride=1, reduction=0.0625, kernel_...
class BasicBlock (line 19) | class BasicBlock(nn.Module):
method __init__ (line 22) | def __init__(self, inplanes, planes, stride=1, downsample=None, reduct...
method forward (line 32) | def forward(self, x):
class Bottleneck (line 50) | class Bottleneck(nn.Module):
method __init__ (line 53) | def __init__(self, inplanes, planes, stride=1, downsample=None, reduct...
method forward (line 65) | def forward(self, x):
class OD_ResNet (line 87) | class OD_ResNet(nn.Module):
method __init__ (line 88) | def __init__(self, block, layers, num_classes=1000, dropout=0.1, reduc...
method net_update_temperature (line 113) | def net_update_temperature(self, temperature):
method _make_layer (line 118) | def _make_layer(self, block, planes, blocks, stride=1, reduction=0.625...
method forward (line 134) | def forward(self, x):
function update_weight (line 147) | def update_weight(model_dict, weight_dict):
function od_resnet18 (line 157) | def od_resnet18(weights=None, kernel_num=1):
function od_resnet34 (line 164) | def od_resnet34(weights=None, kernel_num=1):
function od_resnet50 (line 171) | def od_resnet50(weights=None, kernel_num=1):
function od_resnet101 (line 178) | def od_resnet101(weights=None, kernel_num=1):
FILE: yolo-improve/yolov5-backbone/ODConvFuse/odconv.py
function fuse_conv_bn (line 6) | def fuse_conv_bn(conv, bn):
class Attention (line 39) | class Attention(nn.Module):
method __init__ (line 40) | def __init__(self, in_planes, out_planes, kernel_size, groups=1, reduc...
method _initialize_weights (line 75) | def _initialize_weights(self):
method update_temperature (line 85) | def update_temperature(self, temperature):
method skip (line 89) | def skip(_):
method get_channel_attention (line 92) | def get_channel_attention(self, x):
method get_filter_attention (line 96) | def get_filter_attention(self, x):
method get_spatial_attention (line 100) | def get_spatial_attention(self, x):
method get_kernel_attention (line 105) | def get_kernel_attention(self, x):
method forward (line 110) | def forward(self, x):
method fuse (line 118) | def fuse(self):
class ODConv2d (line 123) | class ODConv2d(nn.Module):
method __init__ (line 124) | def __init__(self, in_planes, out_planes, kernel_size, stride=1, paddi...
method _initialize_weights (line 146) | def _initialize_weights(self):
method update_temperature (line 150) | def update_temperature(self, temperature):
method _forward_impl_common (line 153) | def _forward_impl_common(self, x):
method _forward_impl_pw1x (line 169) | def _forward_impl_pw1x(self, x):
method forward (line 177) | def forward(self, x):
FILE: yolo-improve/yolov5-backbone/PoolFormer/poolformer.py
function _cfg (line 29) | def _cfg(url='', **kwargs):
class PatchEmbed (line 46) | class PatchEmbed(nn.Module):
method __init__ (line 52) | def __init__(self, patch_size=16, stride=16, padding=0,
method forward (line 62) | def forward(self, x):
class LayerNormChannel (line 68) | class LayerNormChannel(nn.Module):
method __init__ (line 73) | def __init__(self, num_channels, eps=1e-05):
method forward (line 79) | def forward(self, x):
class GroupNorm (line 88) | class GroupNorm(nn.GroupNorm):
method __init__ (line 93) | def __init__(self, num_channels, **kwargs):
class Pooling (line 97) | class Pooling(nn.Module):
method __init__ (line 102) | def __init__(self, pool_size=3):
method forward (line 107) | def forward(self, x):
class Mlp (line 111) | class Mlp(nn.Module):
method __init__ (line 116) | def __init__(self, in_features, hidden_features=None,
method _init_weights (line 127) | def _init_weights(self, m):
method forward (line 133) | def forward(self, x):
class PoolFormerBlock (line 142) | class PoolFormerBlock(nn.Module):
method __init__ (line 156) | def __init__(self, dim, pool_size=3, mlp_ratio=4.,
method forward (line 180) | def forward(self, x):
function basic_blocks (line 194) | def basic_blocks(dim, index, layers,
class PoolFormer (line 219) | class PoolFormer(nn.Module):
method __init__ (line 236) | def __init__(self, layers, embed_dims=None,
method reset_classifier (line 308) | def reset_classifier(self, num_classes):
method forward_embeddings (line 313) | def forward_embeddings(self, x):
method forward_tokens (line 317) | def forward_tokens(self, x):
method forward (line 327) | def forward(self, x):
function update_weight (line 343) | def update_weight(model_dict, weight_dict):
function poolformer_s12 (line 353) | def poolformer_s12(pretrained=False, **kwargs):
function poolformer_s24 (line 376) | def poolformer_s24(pretrained=False, **kwargs):
function poolformer_s36 (line 395) | def poolformer_s36(pretrained=False, **kwargs):
function poolformer_m36 (line 415) | def poolformer_m36(pretrained=False, **kwargs):
function poolformer_m48 (line 437) | def poolformer_m48(pretrained=False, **kwargs):
FILE: yolo-improve/yolov5-backbone/RIFormer/RIFormer.py
class Mlp (line 11) | class Mlp(nn.Module):
method __init__ (line 25) | def __init__(self,
method forward (line 39) | def forward(self, x):
class PatchEmbed (line 47) | class PatchEmbed(nn.Module):
method __init__ (line 62) | def __init__(self,
method forward (line 78) | def forward(self, x):
class Affine (line 84) | class Affine(nn.Module):
method __init__ (line 91) | def __init__(self, in_features):
method forward (line 102) | def forward(self, x):
class RIFormerBlock (line 106) | class RIFormerBlock(BaseModule):
method __init__ (line 124) | def __init__(self,
method forward (line 160) | def forward(self, x):
method fuse_affine (line 177) | def fuse_affine(self, norm, token_mixer):
method get_equivalent_scale_bias (line 185) | def get_equivalent_scale_bias(self):
method switch_to_deploy (line 189) | def switch_to_deploy(self):
function basic_blocks (line 202) | def basic_blocks(dim,
function update_weight (line 232) | def update_weight(model_dict, weight_dict):
class RIFormer (line 243) | class RIFormer(nn.Module):
method __init__ (line 327) | def __init__(self,
method forward_embeddings (line 421) | def forward_embeddings(self, x):
method forward_tokens (line 425) | def forward_tokens(self, x):
method forward (line 435) | def forward(self, x):
FILE: yolo-improve/yolov5-backbone/RepViT/repvit.py
function replace_batchnorm (line 8) | def replace_batchnorm(net):
function _make_divisible (line 19) | def _make_divisible(v, divisor, min_value=None):
class Conv2d_BN (line 38) | class Conv2d_BN(torch.nn.Sequential):
method __init__ (line 39) | def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1,
method fuse_self (line 49) | def fuse_self(self):
class Residual (line 62) | class Residual(torch.nn.Module):
method __init__ (line 63) | def __init__(self, m, drop=0.):
method forward (line 68) | def forward(self, x):
method fuse_self (line 76) | def fuse_self(self):
class RepVGGDW (line 94) | class RepVGGDW(torch.nn.Module):
method __init__ (line 95) | def __init__(self, ed) -> None:
method forward (line 102) | def forward(self, x):
method fuse_self (line 106) | def fuse_self(self):
class RepViTBlock (line 134) | class RepViTBlock(nn.Module):
method __init__ (line 135) | def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, ...
method forward (line 169) | def forward(self, x):
class RepViT (line 172) | class RepViT(nn.Module):
method __init__ (line 173) | def __init__(self, cfgs):
method forward (line 193) | def forward(self, x):
method switch_to_deploy (line 203) | def switch_to_deploy(self):
function update_weight (line 206) | def update_weight(model_dict, weight_dict):
function repvit_m0_9 (line 217) | def repvit_m0_9(weights=''):
function repvit_m1_0 (line 255) | def repvit_m1_0(weights=''):
function repvit_m1_1 (line 293) | def repvit_m1_1(weights=''):
function repvit_m1_5 (line 329) | def repvit_m1_5(weights=''):
function repvit_m2_3 (line 383) | def repvit_m2_3(weights=''):
FILE: yolo-improve/yolov5-backbone/SwinTransformer/SwinTransformer.py
class Mlp (line 17) | class Mlp(nn.Module):
method __init__ (line 20) | def __init__(self, in_features, hidden_features=None, out_features=Non...
method forward (line 29) | def forward(self, x):
function window_partition (line 38) | def window_partition(x, window_size):
function window_reverse (line 53) | def window_reverse(windows, window_size, H, W):
class WindowAttention (line 70) | class WindowAttention(nn.Module):
method __init__ (line 84) | def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scal...
method forward (line 118) | def forward(self, x, mask=None):
class SwinTransformerBlock (line 153) | class SwinTransformerBlock(nn.Module):
method __init__ (line 171) | def __init__(self, dim, num_heads, window_size=7, shift_size=0,
method forward (line 195) | def forward(self, x, mask_matrix):
class PatchMerging (line 255) | class PatchMerging(nn.Module):
method __init__ (line 262) | def __init__(self, dim, norm_layer=nn.LayerNorm):
method forward (line 268) | def forward(self, x, H, W):
class BasicLayer (line 298) | class BasicLayer(nn.Module):
method __init__ (line 317) | def __init__(self,
method forward (line 359) | def forward(self, x, H, W):
class PatchEmbed (line 402) | class PatchEmbed(nn.Module):
method __init__ (line 412) | def __init__(self, patch_size=4, in_chans=3, embed_dim=96, norm_layer=...
method forward (line 426) | def forward(self, x):
class SwinTransformer (line 444) | class SwinTransformer(nn.Module):
method __init__ (line 473) | def __init__(self,
method forward (line 551) | def forward(self, x):
function update_weight (line 578) | def update_weight(model_dict, weight_dict):
function SwinTransformer_Tiny (line 588) | def SwinTransformer_Tiny(weights=''):
FILE: yolo-improve/yolov5-backbone/UniRepLKNet/unireplknet.py
class GRNwithNHWC (line 21) | class GRNwithNHWC(nn.Module):
method __init__ (line 27) | def __init__(self, dim, use_bias=True):
method forward (line 34) | def forward(self, x):
class NCHWtoNHWC (line 43) | class NCHWtoNHWC(nn.Module):
method __init__ (line 44) | def __init__(self):
method forward (line 47) | def forward(self, x):
class NHWCtoNCHW (line 51) | class NHWCtoNCHW(nn.Module):
method __init__ (line 52) | def __init__(self):
method forward (line 55) | def forward(self, x):
function get_conv2d (line 63) | def get_conv2d(in_channels, out_channels, kernel_size, stride, padding, ...
function get_bn (line 88) | def get_bn(dim, use_sync_bn=False):
class SEBlock (line 94) | class SEBlock(nn.Module):
method __init__ (line 99) | def __init__(self, input_channels, internal_neurons):
method forward (line 108) | def forward(self, inputs):
function fuse_bn (line 116) | def fuse_bn(conv, bn):
function convert_dilated_to_nondilated (line 121) | def convert_dilated_to_nondilated(kernel, dilate_rate):
function merge_dilated_into_large_kernel (line 135) | def merge_dilated_into_large_kernel(large_kernel, dilated_kernel, dilate...
class DilatedReparamBlock (line 145) | class DilatedReparamBlock(nn.Module):
method __init__ (line 150) | def __init__(self, channels, kernel_size, deploy, use_sync_bn=False, a...
method forward (line 191) | def forward(self, x):
method merge_dilated_branches (line 201) | def merge_dilated_branches(self):
class UniRepLKNetBlock (line 222) | class UniRepLKNetBlock(nn.Module):
method __init__ (line 224) | def __init__(self,
method forward (line 290) | def forward(self, inputs):
method reparameterize (line 306) | def reparameterize(self):
class UniRepLKNet (line 364) | class UniRepLKNet(nn.Module):
method __init__ (line 383) | def __init__(self,
method _init_weights (line 454) | def _init_weights(self, m):
method forward (line 460) | def forward(self, x):
method switch_to_deploy (line 478) | def switch_to_deploy(self):
class LayerNorm (line 485) | class LayerNorm(nn.Module):
method __init__ (line 493) | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_l...
method forward (line 504) | def forward(self, x):
function update_weight (line 514) | def update_weight(model_dict, weight_dict):
function unireplknet_a (line 524) | def unireplknet_a(weights='', **kwargs):
function unireplknet_f (line 530) | def unireplknet_f(weights='', **kwargs):
function unireplknet_p (line 536) | def unireplknet_p(weights='', **kwargs):
function unireplknet_n (line 542) | def unireplknet_n(weights='', **kwargs):
function unireplknet_t (line 548) | def unireplknet_t(weights='', **kwargs):
function unireplknet_s (line 554) | def unireplknet_s(weights='', **kwargs):
function unireplknet_b (line 560) | def unireplknet_b(weights='', **kwargs):
function unireplknet_l (line 566) | def unireplknet_l(weights='', **kwargs):
function unireplknet_xl (line 572) | def unireplknet_xl(weights='', **kwargs):
FILE: yolo-improve/yolov5-backbone/VanillaNet/VanillaNet.py
class activation (line 15) | class activation(nn.ReLU):
method __init__ (line 16) | def __init__(self, dim, act_num=3, deploy=False):
method forward (line 26) | def forward(self, x):
method _fuse_bn_tensor (line 36) | def _fuse_bn_tensor(self, weight, bn):
method switch_to_deploy (line 47) | def switch_to_deploy(self):
class Block (line 57) | class Block(nn.Module):
method __init__ (line 58) | def __init__(self, dim, dim_out, act_num=3, stride=2, deploy=False, ad...
method forward (line 81) | def forward(self, x):
method _fuse_bn_tensor (line 93) | def _fuse_bn_tensor(self, conv, bn):
method switch_to_deploy (line 105) | def switch_to_deploy(self):
class VanillaNet (line 121) | class VanillaNet(nn.Module):
method __init__ (line 122) | def __init__(self, in_chans=3, num_classes=1000, dims=[96, 192, 384, 7...
method _init_weights (line 156) | def _init_weights(self, m):
method change_act (line 161) | def change_act(self, m):
method forward (line 166) | def forward(self, x):
method _fuse_bn_tensor (line 184) | def _fuse_bn_tensor(self, conv, bn):
method switch_to_deploy (line 196) | def switch_to_deploy(self):
function update_weight (line 214) | def update_weight(model_dict, weight_dict):
function vanillanet_5 (line 224) | def vanillanet_5(pretrained='',in_22k=False, **kwargs):
function vanillanet_6 (line 231) | def vanillanet_6(pretrained='',in_22k=False, **kwargs):
function vanillanet_7 (line 238) | def vanillanet_7(pretrained='',in_22k=False, **kwargs):
function vanillanet_8 (line 245) | def vanillanet_8(pretrained='', in_22k=False, **kwargs):
function vanillanet_9 (line 252) | def vanillanet_9(pretrained='', in_22k=False, **kwargs):
function vanillanet_10 (line 259) | def vanillanet_10(pretrained='', in_22k=False, **kwargs):
function vanillanet_11 (line 269) | def vanillanet_11(pretrained='', in_22k=False, **kwargs):
function vanillanet_12 (line 279) | def vanillanet_12(pretrained='', in_22k=False, **kwargs):
function vanillanet_13 (line 289) | def vanillanet_13(pretrained='', in_22k=False, **kwargs):
function vanillanet_13_x1_5 (line 299) | def vanillanet_13_x1_5(pretrained='', in_22k=False, **kwargs):
function vanillanet_13_x1_5_ada_pool (line 309) | def vanillanet_13_x1_5_ada_pool(pretrained='', in_22k=False, **kwargs):
FILE: yolo-improve/yolov5-backbone/fasternet/fasternet.py
class Partial_conv3 (line 15) | class Partial_conv3(nn.Module):
method __init__ (line 17) | def __init__(self, dim, n_div, forward):
method forward_slicing (line 30) | def forward_slicing(self, x: Tensor) -> Tensor:
method forward_split_cat (line 37) | def forward_split_cat(self, x: Tensor) -> Tensor:
class MLPBlock (line 46) | class MLPBlock(nn.Module):
method __init__ (line 48) | def __init__(self,
method forward (line 88) | def forward(self, x: Tensor) -> Tensor:
method forward_layer_scale (line 94) | def forward_layer_scale(self, x: Tensor) -> Tensor:
class BasicStage (line 102) | class BasicStage(nn.Module):
method __init__ (line 104) | def __init__(self,
method forward (line 134) | def forward(self, x: Tensor) -> Tensor:
class PatchEmbed (line 139) | class PatchEmbed(nn.Module):
method __init__ (line 141) | def __init__(self, patch_size, patch_stride, in_chans, embed_dim, norm...
method forward (line 149) | def forward(self, x: Tensor) -> Tensor:
class PatchMerging (line 154) | class PatchMerging(nn.Module):
method __init__ (line 156) | def __init__(self, patch_size2, patch_stride2, dim, norm_layer):
method forward (line 164) | def forward(self, x: Tensor) -> Tensor:
class FasterNet (line 169) | class FasterNet(nn.Module):
method __init__ (line 170) | def __init__(self,
method forward (line 262) | def forward(self, x: Tensor) -> Tensor:
function update_weight (line 274) | def update_weight(model_dict, weight_dict):
function fasternet_t0 (line 284) | def fasternet_t0(weights=None, cfg='models/faster_cfg/fasternet_t0.yaml'):
function fasternet_t1 (line 293) | def fasternet_t1(weights=None, cfg='models/faster_cfg/fasternet_t1.yaml'):
function fasternet_t2 (line 302) | def fasternet_t2(weights=None, cfg='models/faster_cfg/fasternet_t2.yaml'):
function fasternet_s (line 311) | def fasternet_s(weights=None, cfg='models/faster_cfg/fasternet_s.yaml'):
function fasternet_m (line 320) | def fasternet_m(weights=None, cfg='models/faster_cfg/fasternet_m.yaml'):
function fasternet_l (line 329) | def fasternet_l(weights=None, cfg='models/faster_cfg/fasternet_l.yaml'):
FILE: yolo-improve/yolov5-backbone/inceptionnext/inceptionnext.py
class InceptionDWConv2d (line 19) | class InceptionDWConv2d(nn.Module):
method __init__ (line 22) | def __init__(self, in_channels, square_kernel_size=3, band_kernel_size...
method forward (line 31) | def forward(self, x):
class ConvMlp (line 39) | class ConvMlp(nn.Module):
method __init__ (line 43) | def __init__(
method forward (line 57) | def forward(self, x):
class MlpHead (line 66) | class MlpHead(nn.Module):
method __init__ (line 69) | def __init__(self, dim, num_classes=1000, mlp_ratio=3, act_layer=nn.GELU,
method forward (line 79) | def forward(self, x):
class MetaNeXtBlock (line 89) | class MetaNeXtBlock(nn.Module):
method __init__ (line 97) | def __init__(
method forward (line 116) | def forward(self, x):
class MetaNeXtStage (line 127) | class MetaNeXtStage(nn.Module):
method __init__ (line 128) | def __init__(
method forward (line 164) | def forward(self, x):
class MetaNeXt (line 173) | class MetaNeXt(nn.Module):
method __init__ (line 191) | def __init__(
method set_grad_checkpointing (line 248) | def set_grad_checkpointing(self, enable=True):
method no_weight_decay (line 253) | def no_weight_decay(self):
method forward (line 256) | def forward(self, x):
method _init_weights (line 268) | def _init_weights(self, m):
function _cfg (line 274) | def _cfg(url='', **kwargs):
function update_weight (line 284) | def update_weight(model_dict, weight_dict):
function inceptionnext_tiny (line 310) | def inceptionnext_tiny(pretrained=False, **kwargs):
function inceptionnext_small (line 321) | def inceptionnext_small(pretrained=False, **kwargs):
function inceptionnext_base (line 332) | def inceptionnext_base(pretrained=False, **kwargs):
function inceptionnext_base_384 (line 343) | def inceptionnext_base_384(pretrained=False, **kwargs):
FILE: yolo-improve/yolov5-backbone/yolo.py
function parse_model (line 1) | def parse_model(d, ch): # model_dict, input_channels(3)
function _forward_once (line 84) | def _forward_once(self, x, profile=False, visualize=False):
FILE: yolo-improve/yolov5-dyhead.py
function _make_divisible (line 9) | def _make_divisible(v, divisor, min_value=None):
class swish (line 19) | class swish(nn.Module):
method forward (line 20) | def forward(self, x):
class h_swish (line 24) | class h_swish(nn.Module):
method __init__ (line 25) | def __init__(self, inplace=False):
method forward (line 29) | def forward(self, x):
class h_sigmoid (line 33) | class h_sigmoid(nn.Module):
method __init__ (line 34) | def __init__(self, inplace=True, h_max=1):
method forward (line 39) | def forward(self, x):
class DyReLU (line 43) | class DyReLU(nn.Module):
method __init__ (line 44) | def __init__(self, inp, reduction=4, lambda_a=1.0, K2=True, use_bias=T...
method forward (line 82) | def forward(self, x):
class DyDCNv2 (line 126) | class DyDCNv2(nn.Module):
method __init__ (line 139) | def __init__(self,
method forward (line 152) | def forward(self, x, offset, mask):
class DyHeadBlock (line 160) | class DyHeadBlock(nn.Module):
method __init__ (line 166) | def __init__(self,
method _init_weights (line 193) | def _init_weights(self):
method forward (line 200) | def forward(self, x):
FILE: yolo-improve/yolov5-res2block.py
class Bottle2neck (line 1) | class Bottle2neck(nn.Module):
method __init__ (line 4) | def __init__(self, inplanes, planes, shortcut, baseWidth=26, scale = 4):
method forward (line 33) | def forward(self, x):
class C3_Res2Block (line 58) | class C3_Res2Block(C3):
method __init__ (line 60) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ...
FILE: yolo-improve/yolov5-softnms.py
function box_iou_for_nms (line 1) | def box_iou_for_nms(box1, box2, GIoU=False, DIoU=False, CIoU=False, SIoU...
function soft_nms (line 58) | def soft_nms(bboxes, scores, iou_thresh=0.5,sigma=0.5,score_threshold=0....
FILE: yolo-improve/yolov7-CoordConv.py
class AddCoords (line 1) | class AddCoords(nn.Module):
method __init__ (line 2) | def __init__(self, with_r=False):
method forward (line 6) | def forward(self, input_tensor):
class CoordConv (line 36) | class CoordConv(nn.Module):
method __init__ (line 37) | def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,...
method forward (line 45) | def forward(self, x):
FILE: yolo-improve/yolov7-DBB.py
function transI_fusebn (line 2) | def transI_fusebn(kernel, bn):
function transII_addbranch (line 7) | def transII_addbranch(kernels, biases):
function transIII_1x1_kxk (line 10) | def transIII_1x1_kxk(k1, b1, k2, b2, groups):
function transIV_depthconcat (line 28) | def transIV_depthconcat(kernels, biases):
function transV_avg (line 31) | def transV_avg(channels, kernel_size, groups):
function transVI_multiscale (line 38) | def transVI_multiscale(kernel, target_kernel_size):
function conv_bn (line 43) | def conv_bn(in_channels, out_channels, kernel_size, stride=1, padding=0,...
class IdentityBasedConv1x1 (line 55) | class IdentityBasedConv1x1(nn.Conv2d):
method __init__ (line 56) | def __init__(self, channels, groups=1):
method forward (line 67) | def forward(self, input):
method get_actual_kernel (line 72) | def get_actual_kernel(self):
class BNAndPadLayer (line 76) | class BNAndPadLayer(nn.Module):
method __init__ (line 77) | def __init__(self,
method forward (line 88) | def forward(self, input):
method weight (line 104) | def weight(self):
method bias (line 108) | def bias(self):
method running_mean (line 112) | def running_mean(self):
method running_var (line 116) | def running_var(self):
method eps (line 120) | def eps(self):
class DiverseBranchBlock (line 124) | class DiverseBranchBlock(nn.Module):
method __init__ (line 125) | def __init__(self, in_channels, out_channels, k,
method get_equivalent_kernel_bias (line 178) | def get_equivalent_kernel_bias(self):
method switch_to_deploy (line 205) | def switch_to_deploy(self):
method forward (line 222) | def forward(self, inputs):
method init_gamma (line 233) | def init_gamma(self, gamma_value):
method single_init (line 243) | def single_init(self):
FILE: yolo-improve/yolov7-DCN.py
class DCNv2 (line 1) | class DCNv2(nn.Module):
method __init__ (line 2) | def __init__(self, in_channels, out_channels, kernel_size, stride=1,
method forward (line 34) | def forward(self, x):
method reset_parameters (line 56) | def reset_parameters(self):
FILE: yolo-improve/yolov7-DCNV3.py
class DCNV3_YoLo (line 2) | class DCNV3_YoLo(nn.Module):
method __init__ (line 3) | def __init__(self, inc, ouc, k=1, s=1, p=None, g=1, act=True):
method forward (line 11) | def forward(self, x):
FILE: yolo-improve/yolov7-DSConv.py
class DSConv (line 5) | class DSConv(_ConvNd):
method __init__ (line 6) | def __init__(self, in_channels, out_channels, kernel_size, stride=1,
method get_weight_res (line 34) | def get_weight_res(self):
method forward (line 74) | def forward(self, input):
class DSConv2D (line 83) | class DSConv2D(Conv):
method __init__ (line 84) | def __init__(self, inc, ouc, k=1, s=1, p=None, g=1, act=True):
FILE: yolo-improve/yolov7-DecoupledHead.py
class IDetect_Decoupled (line 1) | class IDetect_Decoupled(nn.Module):
method __init__ (line 8) | def __init__(self, nc=80, anchors=(), ch=()): # detection layer
method forward (line 33) | def forward(self, x):
method fuseforward (line 58) | def fuseforward(self, x):
method fuse (line 101) | def fuse(self):
method _make_grid (line 136) | def _make_grid(nx=20, ny=20):
method convert (line 140) | def convert(self, z):
function _initialize_biases (line 152) | def _initialize_biases(self, cf=None): # initialize biases into Detect(...
FILE: yolo-improve/yolov7-DySnakeConv.py
class DySnakeConv (line 1) | class DySnakeConv(nn.Module):
method __init__ (line 2) | def __init__(self, inc, ouc, k=3, act=True) -> None:
method forward (line 10) | def forward(self, x):
class DSConv (line 13) | class DSConv(nn.Module):
method __init__ (line 14) | def __init__(self, in_ch, out_ch, morph, kernel_size=3, if_offset=True...
method forward (line 54) | def forward(self, f):
class DSC (line 75) | class DSC(object):
method __init__ (line 76) | def __init__(self, input_shape, kernel_size, extend_scope, morph):
method _coordinate_map_3D (line 96) | def _coordinate_map_3D(self, offset, if_offset):
method _bilinear_interpolate_3D (line 241) | def _bilinear_interpolate_3D(self, input_feature, y, x):
method deform_conv (line 341) | def deform_conv(self, input, offset, if_offset):
FILE: yolo-improve/yolov7-MPDiou.py
function bbox_mpdiou (line 1) | def bbox_mpdiou(box1, box2, x1y1x2y2=True, mpdiou_hw=None, grid=None, ep...
FILE: yolo-improve/yolov7-NWD.py
function wasserstein_loss (line 1) | def wasserstein_loss(pred, target, eps=1e-7, constant=12.8):
FILE: yolo-improve/yolov7-PConv.py
class PConv (line 1) | class PConv(nn.Module):
method __init__ (line 2) | def __init__(self, dim, ouc, n_div=4, forward='split_cat'):
method forward_slicing (line 16) | def forward_slicing(self, x):
method forward_split_cat (line 23) | def forward_split_cat(self, x):
FILE: yolo-improve/yolov7-RFEM.py
class TridentBlock (line 1) | class TridentBlock(nn.Module):
method __init__ (line 2) | def __init__(self, c1, c2, stride=1, c=False, e=0.5, padding=[1, 2, 3]...
method forward_for_small (line 28) | def forward_for_small(self, x):
method forward_for_middle (line 42) | def forward_for_middle(self, x):
method forward_for_big (line 56) | def forward_for_big(self, x):
method forward (line 70) | def forward(self, x):
class RFEM (line 88) | class RFEM(nn.Module):
method __init__ (line 89) | def __init__(self, c1, c2, n=1, e=0.5, stride=1):
method forward (line 101) | def forward(self, x):
FILE: yolo-improve/yolov7-RepNCSPELAN.py
class RepConvN (line 1) | class RepConvN(nn.Module):
method __init__ (line 7) | def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False...
method forward_fuse (line 19) | def forward_fuse(self, x):
method forward (line 23) | def forward(self, x):
method get_equivalent_kernel_bias (line 28) | def get_equivalent_kernel_bias(self):
method _avg_to_3x3_tensor (line 34) | def _avg_to_3x3_tensor(self, avgp):
method _pad_1x1_to_3x3_tensor (line 43) | def _pad_1x1_to_3x3_tensor(self, kernel1x1):
method _fuse_bn_tensor (line 49) | def _fuse_bn_tensor(self, branch):
method fuse_convs (line 76) | def fuse_convs(self):
class RepNBottleneck (line 101) | class RepNBottleneck(nn.Module):
method __init__ (line 103) | def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5, act=Tr...
method forward (line 110) | def forward(self, x):
class RepNCSP (line 113) | class RepNCSP(nn.Module):
method __init__ (line 115) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, act=True): ...
method forward (line 123) | def forward(self, x):
class RepNCSPELAN4 (line 126) | class RepNCSPELAN4(nn.Module):
method __init__ (line 128) | def __init__(self, c1, c2, c3, c4, c5=1, act=True): # ch_in, ch_out, ...
method forward (line 136) | def forward(self, x):
method forward_split (line 141) | def forward_split(self, x):
FILE: yolo-improve/yolov7-SAConv.py
class ConvAWS2d (line 1) | class ConvAWS2d(nn.Conv2d):
method __init__ (line 2) | def __init__(self,
method _get_weight (line 23) | def _get_weight(self, weight):
method forward (line 32) | def forward(self, x):
method _load_from_state_dict (line 36) | def _load_from_state_dict(self, state_dict, prefix, local_metadata, st...
class SAConv2d (line 50) | class SAConv2d(ConvAWS2d):
method __init__ (line 51) | def __init__(self,
method forward (line 98) | def forward(self, x):
FILE: yolo-improve/yolov7-asf.py
class Zoom_cat (line 2) | class Zoom_cat(nn.Module):
method __init__ (line 3) | def __init__(self):
method forward (line 6) | def forward(self, x):
class ScalSeq (line 15) | class ScalSeq(nn.Module):
method __init__ (line 16) | def __init__(self, inc, channel):
method forward (line 26) | def forward(self, x):
class Add (line 44) | class Add(nn.Module):
method __init__ (line 46) | def __init__(self):
method forward (line 49) | def forward(self, x):
class channel_att (line 54) | class channel_att(nn.Module):
method __init__ (line 55) | def __init__(self, channel, b=1, gamma=2):
method forward (line 64) | def forward(self, x):
class local_att (line 72) | class local_att(nn.Module):
method __init__ (line 73) | def __init__(self, channel, reduction=16):
method forward (line 87) | def forward(self, x):
class attention_model (line 103) | class attention_model(nn.Module):
method __init__ (line 105) | def __init__(self, ch = 256):
method forward (line 109) | def forward(self, x):
FILE: yolo-improve/yolov7-iou.py
class WIoU_Scale (line 4) | class WIoU_Scale:
method __init__ (line 17) | def __init__(self, iou):
method _update (line 22) | def _update(cls, self):
method _scaled_loss (line 27) | def _scaled_loss(cls, self, gamma=1.9, delta=3):
function bbox_iou (line 37) | def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=Fal...
FILE: yolo-improve/yolov7-odconv.py
class Attention (line 7) | class Attention(nn.Module):
method __init__ (line 8) | def __init__(self, in_planes, out_planes, kernel_size, groups=1, reduc...
method _initialize_weights (line 41) | def _initialize_weights(self):
method update_temperature (line 51) | def update_temperature(self, temperature):
method skip (line 55) | def skip(_):
method get_channel_attention (line 58) | def get_channel_attention(self, x):
method get_filter_attention (line 62) | def get_filter_attention(self, x):
method get_spatial_attention (line 66) | def get_spatial_attention(self, x):
method get_kernel_attention (line 71) | def get_kernel_attention(self, x):
method forward (line 76) | def forward(self, x):
class ODConv2d (line 82) | class ODConv2d(nn.Module):
method __init__ (line 83) | def __init__(self, in_planes, out_planes, k, s=1, p=None, g=1, act=Tru...
method _initialize_weights (line 107) | def _initialize_weights(self):
method update_temperature (line 111) | def update_temperature(self, temperature):
method _forward_impl_common (line 114) | def _forward_impl_common(self, x):
method _forward_impl_pw1x (line 130) | def _forward_impl_pw1x(self, x):
method forward (line 138) | def forward(self, x):
FILE: yolo-improve/yolov7-slimneck.py
class GSConv (line 1) | class GSConv(nn.Module):
method __init__ (line 4) | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
method forward (line 10) | def forward(self, x):
class GSBottleneck (line 26) | class GSBottleneck(nn.Module):
method __init__ (line 28) | def __init__(self, c1, c2, k=3, s=1, e=0.5):
method forward (line 37) | def forward(self, x):
class GSBottleneckC (line 40) | class GSBottleneckC(GSBottleneck):
method __init__ (line 42) | def __init__(self, c1, c2, k=3, s=1):
class VoVGSCSP (line 46) | class VoVGSCSP(nn.Module):
method __init__ (line 48) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
method forward (line 58) | def forward(self, x):
class VoVGSCSPC (line 63) | class VoVGSCSPC(VoVGSCSP):
method __init__ (line 65) | def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
FILE: yolo-improve/yolov7-softnms.py
function box_iou_for_nms (line 1) | def box_iou_for_nms(box1, box2, GIoU=False, DIoU=False, CIoU=False, SIoU...
function soft_nms (line 58) | def soft_nms(bboxes, scores, iou_thresh=0.5,sigma=0.5,score_threshold=0....
FILE: yolo-improve/yolov8-DCN.py
class DCNv2 (line 1) | class DCNv2(nn.Module):
method __init__ (line 2) | def __init__(self, in_channels, out_channels, kernel_size, stride=1,
method forward (line 34) | def forward(self, x):
method reset_parameters (line 56) | def reset_parameters(self):
class Bottleneck_DCN (line 66) | class Bottleneck_DCN(nn.Module):
method __init__ (line 68) | def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5): # ch...
method forward (line 81) | def forward(self, x):
class C2f_DCN (line 84) | class C2f_DCN(nn.Module):
method __init__ (line 86) | def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5): # ch_in,...
method forward (line 93) | def forward(self, x):
FILE: yolo-improve/yolov8-erf.py
function get_activation (line 16) | def get_activation(feat, backbone_idx=-1):
function letterbox (line 25) | def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True...
function get_rectangle (line 57) | def get_rectangle(data, thresh):
function heatmap (line 67) | def heatmap(data, camp='RdYlGn', figsize=(10, 10.75), ax=None, save_path...
class yolov8_erf (line 76) | class yolov8_erf:
method __init__ (line 79) | def __init__(self, weight, device, layer, dataset, num_images, save_pa...
method get_input_grad (line 99) | def get_input_grad(self, samples):
method process (line 112) | def process(self):
function get_params (line 160) | def get_params():
FILE: yolo-improve/yolov8-objectcount.py
function get_video_cfg (line 7) | def get_video_cfg(path):
function plot_and_counting (line 13) | def plot_and_counting(result):
FILE: yolo-improve/yolov8-track.py
function get_video_cfg (line 9) | def get_video_cfg(path):
function counting (line 15) | def counting(image_plot, result):
function transform_mot (line 20) | def transform_mot(result):
FILE: yolo-improve/yolov9-backbone/yolo.py
function _forward_once (line 1) | def _forward_once(self, x, profile=False, visualize=False):
function parse_model (line 30) | def parse_model(d, ch): # model_dict, input_channels(3)
FILE: yolo/dataset/xml2txt.py
function convert (line 9) | def convert(size, box):
function convert_annotation (line 23) | def convert_annotation(xmlpath, xmlname):
Condensed preview — 351 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (2,418K chars).
[
{
"path": ".gitignore",
"chars": 2326,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": "Ultralytics-YOLO-project.md",
"chars": 16430,
"preview": "# Ultralytics-YOLO项目详细说明\n\n1. 本项目集成了YOLOv8、v10、v11、v12乃至前沿的YOLO26等全系列基础模型。 无论是做横向对比实验,还是纵向的版本改进,无需到处找资源,一个项目就能满足你所有的实验需求!"
},
{
"path": "bilibili-guide.md",
"chars": 17846,
"preview": "# 魔鬼面具-哔哩哔哩视频指南\n\n### 必看干货系列(建议搞深度学习的小伙伴都看看,特别是图像相关)\n1. [深度学习常见实验问题与实验技巧(适用于所有模型,小白初学者必看!)](https://www.bilibili.com/vide"
},
{
"path": "cv-attention/A2Attention.py",
"chars": 2070,
"preview": "import numpy as np\nimport torch\nfrom torch import nn\nfrom torch.nn import init\nfrom torch.nn import functional as F\n\n\n\nc"
},
{
"path": "cv-attention/BAM.py",
"chars": 3746,
"preview": "import numpy as np\nimport torch\nfrom torch import nn\nfrom torch.nn import init\n\ndef autopad(k, p=None, d=1): # kernel, "
},
{
"path": "cv-attention/Biformer.py",
"chars": 24965,
"preview": "\"\"\"\nCore of BiFormer, Bi-Level Routing Attention.\n\nTo be refactored.\n\nauthor: ZHU Lei\ngithub: https://github.com/rayleiz"
},
{
"path": "cv-attention/CAA.py",
"chars": 1873,
"preview": "import torch.nn as nn\n\ndef autopad(k, p=None, d=1): # kernel, padding, dilation\n \"\"\"Pad to 'same' shape outputs.\"\"\"\n"
},
{
"path": "cv-attention/CBAM.py",
"chars": 2470,
"preview": "import numpy as np\nimport torch\nfrom torch import nn\nfrom torch.nn import init\n\n\nclass ChannelAttention(nn.Module):\n "
},
{
"path": "cv-attention/CPCA.py",
"chars": 2635,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nclass CPCA_ChannelAttention(nn.Module):\n\n def __i"
},
{
"path": "cv-attention/CloAttention.py",
"chars": 4296,
"preview": "import torch\nimport torch.nn as nn\nfrom efficientnet_pytorch.model import MemoryEfficientSwish\n\nclass AttnMap(nn.Module)"
},
{
"path": "cv-attention/CoTAttention.py",
"chars": 1697,
"preview": "import numpy as np\nimport torch\nfrom torch import flatten, nn\nfrom torch.nn import init\nfrom torch.nn.modules.activation"
},
{
"path": "cv-attention/CoordAttention.py",
"chars": 1710,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass h_sigmoid(nn.Module):\n def __init__(self, "
},
{
"path": "cv-attention/DAttention.py",
"chars": 8909,
"preview": "import torch, einops\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport numpy as np\nfrom timm.models.layers im"
},
{
"path": "cv-attention/ECA.py",
"chars": 846,
"preview": "import torch, math\nfrom torch import nn\n\nclass EfficientChannelAttention(nn.Module): # Efficient Channel Atten"
},
{
"path": "cv-attention/ELA.py",
"chars": 638,
"preview": "import torch.nn as nn\n\nclass ELA(nn.Module):\n def __init__(self, channels) -> None:\n super().__init__()\n "
},
{
"path": "cv-attention/EMA.py",
"chars": 1671,
"preview": "import torch\nfrom torch import nn\n\nclass EMA(nn.Module):\n def __init__(self, channels, factor=8):\n super(EMA, "
},
{
"path": "cv-attention/EffectiveSE.py",
"chars": 858,
"preview": "import torch\nfrom torch import nn as nn\nfrom timm.models.layers.create_act import create_act_layer\n\n\nclass EffectiveSEMo"
},
{
"path": "cv-attention/GAM.py",
"chars": 1304,
"preview": "import torch.nn as nn\nimport torch\n \nclass GAM_Attention(nn.Module):\n def __init__(self, in_channels, rate=4):\n "
},
{
"path": "cv-attention/GC.py",
"chars": 2373,
"preview": "import torch\nfrom torch import nn as nn\nimport torch.nn.functional as F\nfrom timm.models.layers.create_act import create"
},
{
"path": "cv-attention/GE.py",
"chars": 3490,
"preview": "import math, torch\nfrom torch import nn as nn\nimport torch.nn.functional as F\n\nfrom timm.models.layers.create_act import"
},
{
"path": "cv-attention/LSKA.py",
"chars": 3621,
"preview": "import torch.nn as nn\n\nclass LSKA(nn.Module):\n # Large-Separable-Kernel-Attention\n # https://github.com/StevenLauH"
},
{
"path": "cv-attention/LSKBlock.py",
"chars": 1062,
"preview": "import torch\nimport torch.nn as nn\n\nclass LSKblock(nn.Module):\n def __init__(self, dim):\n super().__init__()\n "
},
{
"path": "cv-attention/MHSA.py",
"chars": 2166,
"preview": "import torch\nimport torch.nn as nn\n\nclass MHSA(nn.Module):\n def __init__(self, n_dims, width=14, height=14, heads=4, "
},
{
"path": "cv-attention/MLCA.py",
"chars": 2264,
"preview": "import math, torch\nfrom torch import nn\nimport torch.nn.functional as F\n\nclass MLCA(nn.Module):\n def __init__(self, i"
},
{
"path": "cv-attention/MobileViTAttention.py",
"chars": 3613,
"preview": "from torch import nn\nimport torch\nfrom einops import rearrange\n\n\nclass PreNorm(nn.Module):\n def __init__(self, dim, f"
},
{
"path": "cv-attention/ParNetAttention.py",
"chars": 1015,
"preview": "import numpy as np\nimport torch\nfrom torch import nn\nfrom torch.nn import init\n\n\nclass ParNetAttention(nn.Module):\n\n "
},
{
"path": "cv-attention/PolarizedSelfAttention.py",
"chars": 2107,
"preview": "import numpy as np\nimport torch\nfrom torch import nn\nfrom torch.nn import init\n\n\n\nclass ParallelPolarizedSelfAttention(n"
},
{
"path": "cv-attention/S2Attention.py",
"chars": 2360,
"preview": "import numpy as np\nimport torch\nfrom torch import nn\nfrom torch.nn import init\n\n\ndef spatial_shift1(x):\n b, w, h, c ="
},
{
"path": "cv-attention/SE.py",
"chars": 1362,
"preview": "import numpy as np\nimport torch\nfrom torch import nn\nfrom torch.nn import init\n\n\n\nclass SEAttention(nn.Module):\n\n def"
},
{
"path": "cv-attention/SGE.py",
"chars": 1729,
"preview": "import numpy as np\nimport torch\nfrom torch import nn\nfrom torch.nn import init\n\nclass SpatialGroupEnhance(nn.Module):\n "
},
{
"path": "cv-attention/SK.py",
"chars": 1866,
"preview": "import numpy as np\nimport torch\nfrom torch import nn\nfrom torch.nn import init\nfrom collections import OrderedDict\n\n\ncla"
},
{
"path": "cv-attention/SequentialSelfAttention.py",
"chars": 2110,
"preview": "import numpy as np\nimport torch\nfrom torch import nn\nfrom torch.nn import init\n\nclass SequentialPolarizedSelfAttention(n"
},
{
"path": "cv-attention/ShuffleAttention.py",
"chars": 2632,
"preview": "import numpy as np\nimport torch\nfrom torch import nn\nfrom torch.nn import init\nfrom torch.nn.parameter import Parameter\n"
},
{
"path": "cv-attention/SimAM.py",
"chars": 858,
"preview": "import torch\nimport torch.nn as nn\n\n\nclass SimAM(torch.nn.Module):\n def __init__(self, e_lambda=1e-4):\n super("
},
{
"path": "cv-attention/TripletAttention.py",
"chars": 2370,
"preview": "import torch\nimport torch.nn as nn\n\n\nclass BasicConv(nn.Module):\n def __init__(self, in_planes, out_planes, kernel_si"
},
{
"path": "cv-attention/readme.md",
"chars": 3400,
"preview": "# CV-Attention\n关于CV的一些经典注意力机制代码。 \n目前代码格式主要用于yolov3,yolov5,yolov7,yolov8.\n\n# Supports\n| name | need_chaneel | paper |\n| "
},
{
"path": "cvpr2025-deim-project.md",
"chars": 21144,
"preview": "# 2025-SOTA目标检测模型项目(2026发论文必备项目)\n\n鉴于目前YOLO系列模型反映的拒稿率越来越高且YOLO模型确实非常泛滥,无论是不是计算机专业、是不是小白都基本可以快速上手YOLO模型,导致计算机专业和有期刊级别要求的小伙"
},
{
"path": "damo-yolo/Annotations/ReadMe.md",
"chars": 15,
"preview": "# 存放VOC标注格式的文件夹"
},
{
"path": "damo-yolo/JPEGImages/ReadMe.md",
"chars": 10,
"preview": "# 存放图像的文件夹"
},
{
"path": "damo-yolo/readme.md",
"chars": 321,
"preview": "# DAMO-YOLO的数据集处理文件\n本目录下的脚本是针对与DAMO-YOLO的数据集处理脚本,支持如下:\n1. VOC标注格式转换为COCO标注格式,并生成train.json,val.json,test.json.\n\n# 使用方法\n1"
},
{
"path": "damo-yolo/voc2coco.py",
"chars": 6083,
"preview": "import os\nimport glob\nimport json\nimport shutil\nimport numpy as np\nimport xml.etree.ElementTree as ET\n \nSTART_BOUNDING_B"
},
{
"path": "data-offline-aug/object_detection_data_aug.py",
"chars": 8600,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nimport os, shutil, cv2, tqdm\nimport numpy as np\nimport albumentations "
},
{
"path": "data-offline-aug/readme.md",
"chars": 395,
"preview": "# data-offline-aug\n\n### 环境\n\n pip install -i https://pypi.tuna.tsinghua.edu.cn/simple albumentations\n\n### 1. object_de"
},
{
"path": "data-offline-aug/segment_data_aug.py",
"chars": 6762,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nimport os, shutil, cv2, tqdm\nimport numpy as np\nnp.random.seed(0)\nimpo"
},
{
"path": "mmdet-course/config/atss_r50_fpn_dyhead_1x_visdrone.py",
"chars": 1974,
"preview": "_base_ = 'atss_r50_fpn_dyhead_1x_coco.py'\n\nmodel = dict(\n bbox_head=dict(\n num_classes=10\n )\n)\n\n# 修改数据集相关配置"
},
{
"path": "mmdet-course/config/cascade-rcnn_r50_fpn_1x_visdrone.py",
"chars": 2372,
"preview": "_base_ = './cascade-rcnn_r50_fpn_1x_coco.py'\n\n# 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数\nmodel = dict(\n roi_head=dict("
},
{
"path": "mmdet-course/config/ddq-detr-4scale_r50_8xb2-12e_visdrone.py",
"chars": 2028,
"preview": "_base_ = 'ddq-detr-4scale_r50_8xb2-12e_coco.py'\n\nmodel = dict(\n bbox_head=dict(\n type='DDQDETRHead',\n n"
},
{
"path": "mmdet-course/config/dino-4scale_r50_8xb2-12e_visdrone.py",
"chars": 1986,
"preview": "_base_ = 'dino-4scale_r50_8xb2-12e_coco.py'\n\nmodel = dict(\n bbox_head=dict(\n type='DINOHead',\n num_clas"
},
{
"path": "mmdet-course/config/faster-rcnn_r50_fpn_ciou_1x_visdrone.py",
"chars": 2135,
"preview": "_base_ = 'faster-rcnn_r50_fpn_ciou_1x_coco.py'\n\n# 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数\nmodel = dict(\n roi_head=dic"
},
{
"path": "mmdet-course/config/gfl_r50_fpn_1x_visdrone.py",
"chars": 2026,
"preview": "_base_ = 'gfl_r50_fpn_1x_coco.py'\n\n# 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数\nmodel = dict(\n bbox_head=dict(\n n"
},
{
"path": "mmdet-course/config/retinanet_r50_fpn_1x_visdrone.py",
"chars": 2099,
"preview": "_base_ = 'retinanet_r50_fpn_1x_coco.py'\n\n# 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数\nmodel = dict(\n bbox_head=dict(\n "
},
{
"path": "mmdet-course/config/rtmdet_tiny_8xb32-300e_visdrone.py",
"chars": 2081,
"preview": "_base_ = 'rtmdet_tiny_8xb32-300e_coco.py'\n\nmodel = dict(\n bbox_head=dict(\n num_classes=10\n )\n)\n\n# 修改数据集相关配置"
},
{
"path": "mmdet-course/config/tood_r50_fpn_1x_visdrone.py",
"chars": 1959,
"preview": "_base_ = './tood_r50_fpn_1x_coco.py'\n\n# 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数\nmodel = dict(\n bbox_head=dict(\n "
},
{
"path": "mmdet-course/config/yolox_tiny_8xb8-300e_visdrone.py",
"chars": 5364,
"preview": "_base_ = './yolox_tiny_8xb8-300e_coco.py'\n\n# 我们还需要更改 head 中的 num_classes 以匹配数据集中的类别数\nmodel = dict(\n bbox_head=dict(\n "
},
{
"path": "mmdet-course/mmdet2yolo.py",
"chars": 12257,
"preview": "import os, torch, cv2, math, tqdm, time, shutil, argparse, json, pickle\nimport numpy as np\nfrom prettytable import Prett"
},
{
"path": "mmdet-course/readme.md",
"chars": 2705,
"preview": "# mmdet使用教程\n\n### mmdet教程命令\n\n1. conda create -n mmdet_py39 python=3.9 anaconda\n2. https://mmdetection.readthedocs.io/en/l"
},
{
"path": "mmdet-course/yolo2coco.py",
"chars": 6994,
"preview": "import os\nimport cv2\nimport json\nfrom tqdm import tqdm\nfrom sklearn.model_selection import train_test_split\nimport argpa"
},
{
"path": "module-info/CVPR2023-SMPConv.md",
"chars": 1920,
"preview": "# SMPConv模块总结 https://arxiv.org/pdf/2304.02330\n\n## 1. 背景\n\n### 连续卷积的兴起\n连续卷积因其处理不规则采样数据和建模长期依赖关系的能力而备受关注[1]。随着大型卷积核在实验中展现出"
},
{
"path": "module-info/CVPR2024-DCMPNet.md",
"chars": 1457,
"preview": "# LEGM和MFM模块详细总结 https://arxiv.org/pdf/2403.01105\n\n## LEGM模块 (Local Feature-embedded Global Feature Extraction Module)\n\n"
},
{
"path": "module-info/CVPR2024-FADC.md",
"chars": 1580,
"preview": "### **FADC模块总结** https://arxiv.org/pdf/2403.05369\n\n#### **1. 背景**\n膨胀卷积(Dilated Convolution)通过插入间隔增加感受野,广泛应用于语义分割和目标检测任务。"
},
{
"path": "module-info/CVPR2024-PKINet.md",
"chars": 1318,
"preview": "### **PKI Module总结** https://openaccess.thecvf.com/content/CVPR2024/papers/Cai_Poly_Kernel_Inception_Network_for_Remote_"
},
{
"path": "module-info/CVPR2024-ParameterNet.md",
"chars": 1631,
"preview": "# DynamicConv模块总结 https://arxiv.org/pdf/2306.14525v2\n\n## 1. 背景\n\n### 问题背景\n在大规模视觉预训练中,研究者发现了\"低FLOPs陷阱\"现象:低FLOPs模型无法从大规模预训练"
},
{
"path": "module-info/CVPR2024-RMT.md",
"chars": 1980,
"preview": "# RMT Block模块详细分析 https://arxiv.org/pdf/2309.11523\n\n## 1. 背景\n\n### Vision Transformer的局限性\n传统的Vision Transformer (ViT)存在两个"
},
{
"path": "module-info/CVPR2024-RepVIT.md",
"chars": 1390,
"preview": "### RepViT Block模块总结 https://arxiv.org/pdf/2307.09283\n\n#### 1. 背景\n\n**原始问题**:\n- MobileNetV3采用的是传统的倒残差瓶颈结构,其中Token Mixer(空"
},
{
"path": "module-info/CVPR2024-Rewrite the Stars.md",
"chars": 1909,
"preview": "# StarBlocks模块总结 https://arxiv.org/pdf/2403.19967\n\n## 1. 背景\n\n### 传统网络设计的局限性\n在深度学习发展历程中,大多数网络都基于**线性投影(卷积和线性层)与非线性激活函数的组合"
},
{
"path": "module-info/CVPR2024-SFSConv.md",
"chars": 2919,
"preview": "# SFS-Conv模块详细总结 https://openaccess.thecvf.com/content/CVPR2024/papers/Li_Unleashing_Channel_Potential_Space-Frequency_S"
},
{
"path": "module-info/CVPR2024-TransNext.md",
"chars": 2470,
"preview": "# TransNeXt核心模块详解 https://arxiv.org/pdf/2311.17132\n\n## 一、Aggregated Attention(聚合注意力)\n\n### 1. 背景\n\n#### 现有问题\n- **深度退化效应**:"
},
{
"path": "module-info/CVPR2024-UniRepLKNet.md",
"chars": 1497,
"preview": "# Dilated Reparam Block 模块总结 https://arxiv.org/pdf/2311.15599\n\n## 1. 背景\n\n### 传统大核设计的局限性\n在UniRepLKNet之前,已有研究表明大核卷积应该与并行的小"
},
{
"path": "module-info/CVPR2025-BHViT.md",
"chars": 1690,
"preview": "# BHViT: 二值化混合视觉Transformer论文总结 https://arxiv.org/pdf/2503.02394\n\n## 核心思想与主要贡献\n\n本文提出了BHViT(Binarized Hybrid Vision Trans"
},
{
"path": "module-info/CVPR2025-DarkIR.md",
"chars": 1593,
"preview": "# DarkIR中EBlock和DBlock模块详细分析 https://arxiv.org/pdf/2412.13443\n\n## EBlock (编码器块) - 低光增强编码器\n\n### 1. 背景\n在低光条件下,图像主要面临照明不足的问"
},
{
"path": "module-info/CVPR2025-EVSSM.md",
"chars": 1524,
"preview": "# EVS和EDFFN模块详细分析 https://arxiv.org/pdf/2405.14343\n\n## EVS(高效视觉扫描)模块\n\n### 1. 背景\n传统的状态空间模型(如Mamba)是为处理一维序列数据而设计的,直接应用到视觉任"
},
{
"path": "module-info/CVPR2025-EfficientViM.md",
"chars": 2208,
"preview": "# EfficientViM模块详细分析 https://arxiv.org/pdf/2411.15241\n\n## 1. 背景\n\n### 现有技术挑战\n- **传统CNN局限性**:卷积神经网络虽然在局部特征提取上表现良好,但在捕获全局依赖"
},
{
"path": "module-info/CVPR2025-FDConv.md",
"chars": 1340,
"preview": "# FDConv模块详细总结 https://arxiv.org/pdf/2503.18783\n\n## 1. 背景\n\n### 传统动态卷积的发展与局限\n- **动态卷积(DY-Conv)** 通过使用多个并行权重结合注意力机制,实现了样本特"
},
{
"path": "module-info/CVPR2025-GroupMamba.md",
"chars": 2192,
"preview": "# GroupMamba Layer模块详细总结 https://arxiv.org/pdf/2407.13772\n\n## 1. 背景\n\n### 现有问题\n传统的Mamba模型在计算机视觉任务中面临几个关键挑战:\n\n**稳定性问题**:\n-"
},
{
"path": "module-info/CVPR2025-LSNet.md",
"chars": 1893,
"preview": "# LSNet中的LS Block模块总结 https://arxiv.org/pdf/2503.23135\n\n## 1. 背景\n\n### 传统轻量级网络的局限性\n现有轻量级视觉网络主要依赖两种token混合方式:\n- **自注意力机制**"
},
{
"path": "module-info/CVPR2025-MambaIRV2.md",
"chars": 1879,
"preview": "# Attentive State Space Group (ASSG) 模块总结 https://arxiv.org/pdf/2411.15269\n\n## 1. 背景\n\n### 问题背景\n传统Mamba架构在图像修复任务中面临的核心挑战:"
},
{
"path": "module-info/CVPR2025-MambaOut.md",
"chars": 1577,
"preview": "# Gated CNN Block 模块总结 https://arxiv.org/pdf/2405.07992\n\n## 1. 背景\n\n### 历史发展背景\nGated CNN block最初由Dauphin等人在2017年提出,用于语言建模"
},
{
"path": "module-info/CVPR2025-MambaVision.md",
"chars": 2030,
"preview": "# MambaVision Mixer模块总结 https://arxiv.org/pdf/2407.08083\n\n## 1. 背景\n\n### 原始Mamba在视觉任务中的局限性\n传统Mamba架构虽然在自然语言处理任务中表现出色,但在计算"
},
{
"path": "module-info/CVPR2025-MobileMamba.md",
"chars": 2524,
"preview": "# MobileMamba模块详细分析 https://arxiv.org/pdf/2411.15941\n\n## 1. 背景\n\n### 现有方法的局限性\n- **CNN模型局限**:基于CNN的轻量级模型(如MobileNets)主要使用局"
},
{
"path": "module-info/CVPR2025-Mona.md",
"chars": 1875,
"preview": "# Mona模块详细分析 https://arxiv.org/pdf/2408.08345\n\n## 1. 背景\n\n### 传统适配器的局限性\n- **来源局限**:现有的计算机视觉适配器设计主要沿用NLP领域的线性适配器结构,使用线性滤波器"
},
{
"path": "module-info/CVPR2025-OverLoCK.md",
"chars": 1892,
"preview": "# OverLoCK网络模块详解 https://arxiv.org/pdf/2502.20087\n\n## 1. BasicBlock模块\n\n### 背景\nBasicBlock是OverLoCK网络中Base-Net和Overview-Ne"
},
{
"path": "module-info/CVPR2025-SCSegamba.md",
"chars": 2178,
"preview": "# SAVSS模块详细总结 https://arxiv.org/pdf/2503.01113\n\n## 1. 背景\n\n### 现有方法的局限性\n当前裂缝分割方法面临的主要挑战包括[1][2][3]:\n\n**CNN方法的限制**:\n- CNN如"
},
{
"path": "module-info/CVPR2025-Transformers without Normalization.md",
"chars": 1798,
"preview": "# DyT (Dynamic Tanh) 模块详细总结 https://arxiv.org/pdf/2503.10622\n\n## 1. 背景\n\n### 归一化层的普遍性与重要性\n- **历史地位**: 自2015年Batch Normali"
},
{
"path": "module-info/CVPR2025-vHeat.md",
"chars": 1776,
"preview": "# vHeat模块总结 https://arxiv.org/pdf/2405.16555\n\n## 1. 背景\n\n### 现有视觉模型的局限性\n- **CNN的限制**:卷积神经网络依赖局部感受野和固定卷积算子,在捕获长程和复杂依赖关系方面存"
},
{
"path": "module-info/ICLR2025-Pola.md",
"chars": 1759,
"preview": "# PolaFormer中的Pola模块总结 https://arxiv.org/pdf/2501.15061\n\n## 1. 背景\n\n### 传统线性注意力的局限性\n传统的Transformer自注意力机制具有O(N²)的二次复杂度,在处理"
},
{
"path": "module-info/ICLR2025-ToST.md",
"chars": 2553,
"preview": "# Token Statistics Self-Attention (TSSA) 模块总结 https://arxiv.org/pdf/2412.17810\n\n## 1. 背景\n\n### 传统注意力机制的挑战\n传统Transformer的自"
},
{
"path": "module-info/TPAMI2025-HyperYOLO.md",
"chars": 2318,
"preview": "# Mixed Aggregation Network (MANet) 模块总结\n\n## 1. 背景\n传统YOLO系列方法的骨干网络主要依赖单一的基础模块进行特征提取,如YOLOv8中的C2f模块。这种单一结构限制了信息流的多样性和特征提取"
},
{
"path": "mutilmodel-project.md",
"chars": 3453,
"preview": "# 2025-YOLO|RTDETR多模态目标检测项目\n对于当今的视觉任务来说,最简单入手的便是YOLO系列,通过ultralytics库的帮助下,无论是否来自计算机科班的同学基本都可以快速构建自己的目标检测模型。但是与简单方便相伴而来的是"
},
{
"path": "objectdetection-tricks/readme.md",
"chars": 2339,
"preview": "# objectdetection-tricks\n这个项目主要是提供一些关于目标检测的tricks.\n\n# Explanation\n- **tricks_1** \n 可视化并统计目标检测中的TP,FP,FN. \n 视频教学地"
},
{
"path": "objectdetection-tricks/tricks_1.py",
"chars": 4398,
"preview": "import os, cv2, tqdm, shutil\nimport numpy as np\n\ndef xywh2xyxy(box):\n box[:, 0] = box[:, 0] - box[:, 2] / 2\n box[:"
},
{
"path": "objectdetection-tricks/tricks_10.py",
"chars": 1010,
"preview": "import torch, thop\nfrom thop import profile\nfrom ultralytics import YOLO, RTDETR\nfrom prettytable import PrettyTable\n\nif"
},
{
"path": "objectdetection-tricks/tricks_11.py",
"chars": 651,
"preview": "import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\n\nif __name__ == '__main__':\n file_list = ['a/f"
},
{
"path": "objectdetection-tricks/tricks_12.py",
"chars": 2926,
"preview": "import pandas as pd\nimport numpy as np\nimport matplotlib.pylab as plt\n\ndef deal_yolov7_result(data_path):\n with open("
},
{
"path": "objectdetection-tricks/tricks_13.py",
"chars": 722,
"preview": "if type(x) in {list, tuple}:\n if idx == (len(self.model) - 1):\n if type(x[1]) is dict:\n print(f'lay"
},
{
"path": "objectdetection-tricks/tricks_14.py",
"chars": 4498,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nimport os\nimport numpy as np\nfrom prettytable import PrettyTable\nfrom "
},
{
"path": "objectdetection-tricks/tricks_15.py",
"chars": 7764,
"preview": "import os, glob, cv2, tqdm\nfrom prettytable import PrettyTable\n\nRED, GREEN, BLUE, YELLOW, ORANGE, RESET = \"\\033[91m\", \"\\"
},
{
"path": "objectdetection-tricks/tricks_16.py",
"chars": 4929,
"preview": "import json, tqdm, cv2, shutil, os\nimport numpy as np\nimport matplotlib.pyplot as plt\n\n# 1. 标签文件类别有问题,例如类别从1开始,不是从0开始。\n#"
},
{
"path": "objectdetection-tricks/tricks_2.py",
"chars": 12371,
"preview": "import torch, time, math, thop, tqdm, torchvision\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.nn.mo"
},
{
"path": "objectdetection-tricks/tricks_3.py",
"chars": 1551,
"preview": "def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):\n \"\"\"\n x: Fe"
},
{
"path": "objectdetection-tricks/tricks_4.py",
"chars": 3332,
"preview": "import os\nimport cv2\nimport json\nfrom tqdm import tqdm\nfrom sklearn.model_selection import train_test_split\nimport argpa"
},
{
"path": "objectdetection-tricks/tricks_5.py",
"chars": 4993,
"preview": "import cv2\nimport numpy as np\nimport matplotlib.pylab as plt\nfrom segment_anything import SamPredictor, sam_model_regist"
},
{
"path": "objectdetection-tricks/tricks_6.py",
"chars": 1121,
"preview": "import pkg_resources as pkg\ndef check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=Fals"
},
{
"path": "objectdetection-tricks/tricks_7.py",
"chars": 2844,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nimport argparse\nimport logging\nimport math\nimport os\nimport random\nimp"
},
{
"path": "objectdetection-tricks/tricks_8.py",
"chars": 3025,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nimport argparse\nimport logging\nimport math\nimport os\nimport random\nimp"
},
{
"path": "objectdetection-tricks/tricks_9.py",
"chars": 15371,
"preview": "import torch, time, math, thop, tqdm, torchvision\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom prettytable"
},
{
"path": "readme.md",
"chars": 11731,
"preview": "# Object Detection Script\n这个项目主要是提供一些关于目标检测的代码和改进思路参考.\n\n### [BiliBili视频指南](https://github.com/z1069614715/objectdetectio"
},
{
"path": "visdrone2019-benchmark/readme.md",
"chars": 5729,
"preview": "# VisDrone2019 Testset Benchmark\n### Visdrone2019 测试集(1610张图) COCO指标 (有需要使用对比实验数据的同学可以直接用)\n### Jetson Orin Nano 4G Tenso"
},
{
"path": "yolo/data.yaml",
"chars": 149,
"preview": "# dataset path\ntrain: ./dataset/images/train\nval: ./dataset/images/val\ntest: ./dataset/images/test\n\n# number of classes\n"
},
{
"path": "yolo/dataset/VOCdevkit/Annotations/ReadMe.md",
"chars": 15,
"preview": "# 存放VOC标注格式的文件夹"
},
{
"path": "yolo/dataset/VOCdevkit/JPEGImages/ReadMe.md",
"chars": 10,
"preview": "# 存放图像的文件夹"
},
{
"path": "yolo/dataset/VOCdevkit/txt/ReadMe.md",
"chars": 16,
"preview": "# 存放YOLO标注格式的文件夹"
},
{
"path": "yolo/dataset/split_data.py",
"chars": 1450,
"preview": "import os, shutil, random\nrandom.seed(0)\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\n\nval_si"
},
{
"path": "yolo/dataset/xml2txt.py",
"chars": 2388,
"preview": "import xml.etree.ElementTree as ET\nimport os, cv2\nimport numpy as np\nfrom os import listdir\nfrom os.path import join\n\ncl"
},
{
"path": "yolo/readme.md",
"chars": 1142,
"preview": "# YOLOV5,YOLOV7,YOLOV8的数据集处理文件\n本目录下的脚本是针对与yolov5,v7,v8的数据集处理脚本,支持如下:\n1. VOC标注格式转换为YOLO标注格式。\n2. 对数据集进行划分训练集,验证集,测试集。\n\n# V"
},
{
"path": "yolo-gradcam/README.md",
"chars": 656,
"preview": "# yolo-gradcam\nyolo model with gradcam visual. \n即插即用,不需要对源码进行任何修改!\n\n## 哔哩哔哩视频教学地址\n1. yolov5-[哔哩哔哩地址](https://www.bilibi"
},
{
"path": "yolo-gradcam/yolov11_heatmap.py",
"chars": 17143,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nwarnings.simplefilter('ignore')\nimport torch, yaml, cv2, os, shutil, s"
},
{
"path": "yolo-gradcam/yolov5_heatmap.py",
"chars": 8100,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nwarnings.simplefilter('ignore')\nimport torch, yaml, cv2, os, shutil\nim"
},
{
"path": "yolo-gradcam/yolov7_heatmap.py",
"chars": 8319,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nwarnings.simplefilter('ignore')\nimport torch, yaml, cv2, os, shutil\nim"
},
{
"path": "yolo-gradcam/yolov8_heatmap.py",
"chars": 17143,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nwarnings.simplefilter('ignore')\nimport torch, yaml, cv2, os, shutil, s"
},
{
"path": "yolo-gradcam/yolov9_heatmap.py",
"chars": 8125,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nwarnings.simplefilter('ignore')\nimport torch, yaml, cv2, os, shutil\nim"
},
{
"path": "yolo-improve/CAM.py",
"chars": 2755,
"preview": "class CAM(nn.Module):\n def __init__(self, inc, fusion='weight'):\n super().__init__()\n \n assert f"
},
{
"path": "yolo-improve/iou.py",
"chars": 7051,
"preview": "import numpy as np\nimport torch, math\n\nclass WIoU_Scale:\n ''' monotonous: {\n None: origin v1\n T"
},
{
"path": "yolo-improve/paper.md",
"chars": 2078,
"preview": "# 基于YOLO和RT-DETR的论文全流程指导项目<此项目全程由E导主导>\n\n### 1. 入手此项目后如果还需要一对一的服务享受会员优惠,此一对一为E导主导\n\n1. 实验方面讲解 268/h (会员248/h) --(拒绝废话纯干货直击"
},
{
"path": "yolo-improve/readme.md",
"chars": 19060,
"preview": "# YOLO-Improve\n这个项目主要是提供一些关于yolo系列模型的改进思路,效果因数据集和参数而异,仅作参考。 \n\n\n# Explanation\n- **iou** \n 添加EIOU,SIOU,ALPHA-IOU, Foc"
},
{
"path": "yolo-improve/rtdetr-compress.md",
"chars": 2933,
"preview": "# RTDETR剪枝项目介绍\n\n## 对于群里的剪枝相关问题,我基本都会回复,对于一些剪枝问题,我都会给出建议。 \n\n### 首先剪枝是什么? \n模型剪枝是深度学习中的一种技术,旨在通过减少神经网络中不必要的参数和连接,来优化模型的效率"
},
{
"path": "yolo-improve/rtdetr-distill.md",
"chars": 3684,
"preview": "# RTDETR蒸馏项目介绍\n\n### 首先蒸馏是什么? \n模型蒸馏(Model Distillation)是一种用于在计算机视觉中提高模型性能和效率的技术。在模型蒸馏中,通常存在两个模型,即“教师模型”和“学生模型”。\n\n### 为什么"
},
{
"path": "yolo-improve/rtdetr-project.md",
"chars": 116047,
"preview": "# [基于Ultralytics的RT-DETR改进详细介绍](https://github.com/z1069614715/objectdetection_script)\n\n# 目前自带的一些改进方案(目前拥有合计320+个改进点!持续更"
},
{
"path": "yolo-improve/ultralytics-yolo/get_COCO_metrice.py",
"chars": 1125,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nimport argparse\nfrom pycocotools.coco import COCO\nfrom pycocotools.coc"
},
{
"path": "yolo-improve/ultralytics-yolo/heatmap.py",
"chars": 21362,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nwarnings.simplefilter('ignore')\nimport torch, yaml, cv2, os, shutil, s"
},
{
"path": "yolo-improve/ultralytics-yolo/requirements.txt",
"chars": 79,
"preview": "PyYAML\ntensorboard\nscipy\nthop\ntransformers\neinops\nprettytable\nPyWavelets\npolars"
},
{
"path": "yolo-improve/ultralytics-yolo/train.py",
"chars": 1722,
"preview": "import warnings, os, sys\nsys.path.append(os.path.dirname(os.path.abspath(__file__)))\nwarnings.filterwarnings('ignore')\nf"
},
{
"path": "yolo-improve/ultralytics-yolo/val.py",
"chars": 4668,
"preview": "import warnings\nwarnings.filterwarnings('ignore')\nimport os\nimport numpy as np\nfrom prettytable import PrettyTable\nfrom "
},
{
"path": "yolo-improve/ultralytics-yolo/yolo2coco.py",
"chars": 6119,
"preview": "import json\nimport os\nfrom pathlib import Path\nfrom PIL import Image\n\n\nclass YOLOtoCOCO:\n def __init__(self, yolo_dir"
},
{
"path": "yolo-improve/yolov11-project.md",
"chars": 87013,
"preview": "# [基于Ultralytics的YOLO11|YOLO12改进项目.(69.9¥)](https://github.com/z1069614715/objectdetection_script)\n#### 因为YOLO11和YOLO12的"
},
{
"path": "yolo-improve/yolov5-AIFI.py",
"chars": 5403,
"preview": "import torch\nimport torch.nn as nn\n\nclass TransformerEncoderLayer(nn.Module):\n \"\"\"Defines a single layer of the trans"
},
{
"path": "yolo-improve/yolov5-AUX/benchmarks.py",
"chars": 7823,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nRun YOLOv5 benchmarks on all supported export formats\n\nFormat "
},
{
"path": "yolo-improve/yolov5-AUX/data/Argoverse.yaml",
"chars": 2710,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Argoverse-HD dataset (ring-front-center camera) http://www.cs.cmu.edu/~meng"
},
{
"path": "yolo-improve/yolov5-AUX/data/GlobalWheat2020.yaml",
"chars": 1862,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Global Wheat 2020 dataset http://www.global-wheat.com/ by University of Sas"
},
{
"path": "yolo-improve/yolov5-AUX/data/ImageNet.yaml",
"chars": 18848,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# ImageNet-1k dataset https://www.image-net.org/index.php by Stanford Univers"
},
{
"path": "yolo-improve/yolov5-AUX/data/Objects365.yaml",
"chars": 9182,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Objects365 dataset https://www.objects365.org/ by Megvii\n# Example usage: p"
},
{
"path": "yolo-improve/yolov5-AUX/data/SKU-110K.yaml",
"chars": 2318,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# SKU-110K retail items dataset https://github.com/eg4000/SKU110K_CVPR19 by T"
},
{
"path": "yolo-improve/yolov5-AUX/data/VOC.yaml",
"chars": 3470,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of "
},
{
"path": "yolo-improve/yolov5-AUX/data/VisDrone.yaml",
"chars": 2948,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# VisDrone2019-DET dataset https://github.com/VisDrone/VisDrone-Dataset by Ti"
},
{
"path": "yolo-improve/yolov5-AUX/data/coco.yaml",
"chars": 2472,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# COCO 2017 dataset http://cocodataset.org by Microsoft\n# Example usage: pyth"
},
{
"path": "yolo-improve/yolov5-AUX/data/coco128-seg.yaml",
"chars": 1845,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 i"
},
{
"path": "yolo-improve/yolov5-AUX/data/coco128.yaml",
"chars": 1829,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# COCO128 dataset https://www.kaggle.com/ultralytics/coco128 (first 128 image"
},
{
"path": "yolo-improve/yolov5-AUX/data/hyps/hyp.Objects365.yaml",
"chars": 670,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Hyperparameters for Objects365 training\n# python train.py --weights yolov5m"
},
{
"path": "yolo-improve/yolov5-AUX/data/hyps/hyp.VOC.yaml",
"chars": 1153,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Hyperparameters for VOC training\n# python train.py --batch 128 --weights yo"
},
{
"path": "yolo-improve/yolov5-AUX/data/hyps/hyp.no-augmentation.yaml",
"chars": 1681,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Hyperparameters when using Albumentations frameworks\n# python train.py --hy"
},
{
"path": "yolo-improve/yolov5-AUX/data/hyps/hyp.scratch-high.yaml",
"chars": 1680,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Hyperparameters for high-augmentation COCO training from scratch\n# python t"
},
{
"path": "yolo-improve/yolov5-AUX/data/hyps/hyp.scratch-low.yaml",
"chars": 1688,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Hyperparameters for low-augmentation COCO training from scratch\n# python tr"
},
{
"path": "yolo-improve/yolov5-AUX/data/hyps/hyp.scratch-med.yaml",
"chars": 1682,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Hyperparameters for medium-augmentation COCO training from scratch\n# python"
},
{
"path": "yolo-improve/yolov5-AUX/data/scripts/download_weights.sh",
"chars": 611,
"preview": "#!/bin/bash\n# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Download latest models from https://github.com/ultralytics/yolo"
},
{
"path": "yolo-improve/yolov5-AUX/data/scripts/get_coco.sh",
"chars": 1543,
"preview": "#!/bin/bash\n# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Download COCO 2017 dataset http://cocodataset.org\n# Example usa"
},
{
"path": "yolo-improve/yolov5-AUX/data/scripts/get_coco128.sh",
"chars": 595,
"preview": "#!/bin/bash\n# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Download COCO128 dataset https://www.kaggle.com/ultralytics/coc"
},
{
"path": "yolo-improve/yolov5-AUX/data/scripts/get_imagenet.sh",
"chars": 1648,
"preview": "#!/bin/bash\n# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Download ILSVRC2012 ImageNet dataset https://image-net.org\n# Ex"
},
{
"path": "yolo-improve/yolov5-AUX/data/xView.yaml",
"chars": 5147,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# DIUx xView 2018 Challenge https://challenge.xviewdataset.org by U.S. Nation"
},
{
"path": "yolo-improve/yolov5-AUX/detect.py",
"chars": 14289,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nRun YOLOv5 detection inference on images, videos, directories, globs, You"
},
{
"path": "yolo-improve/yolov5-AUX/export.py",
"chars": 32024,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nExport a YOLOv5 PyTorch model to other formats. TensorFlow exports author"
},
{
"path": "yolo-improve/yolov5-AUX/hubconf.py",
"chars": 7700,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nPyTorch Hub models https://pytorch.org/hub/ultralytics_yolov5\n\nUsage:\n "
},
{
"path": "yolo-improve/yolov5-AUX/models/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "yolo-improve/yolov5-AUX/models/common.py",
"chars": 41623,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nCommon modules\n\"\"\"\n\nimport ast\nimport contextlib\nimport json\nimport math\n"
},
{
"path": "yolo-improve/yolov5-AUX/models/experimental.py",
"chars": 4316,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nExperimental modules\n\"\"\"\nimport math\n\nimport numpy as np\nimport torch\nimp"
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/anchors.yaml",
"chars": 3332,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n# Default anchors for COCO data\n\n\n# P5 --------------------------------------"
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov3-spp.yaml",
"chars": 1564,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov3-tiny.yaml",
"chars": 1229,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov3.yaml",
"chars": 1555,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5-bifpn.yaml",
"chars": 1420,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5-fpn.yaml",
"chars": 1211,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5-p2.yaml",
"chars": 1684,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5-p34.yaml",
"chars": 1346,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5-p6.yaml",
"chars": 1738,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5-p7.yaml",
"chars": 2119,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5-panet.yaml",
"chars": 1404,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5l6.yaml",
"chars": 1817,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5m6.yaml",
"chars": 1819,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.67 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5n6.yaml",
"chars": 1819,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5s-LeakyReLU.yaml",
"chars": 1494,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\nactivation: nn.LeakyReLU(0.1) # <-"
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5s-ghost.yaml",
"chars": 1480,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5s-transformer.yaml",
"chars": 1438,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5s6.yaml",
"chars": 1819,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/hub/yolov5x6.yaml",
"chars": 1819,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/segment/yolov5l-seg.yaml",
"chars": 1408,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/segment/yolov5m-seg.yaml",
"chars": 1410,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.67 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/segment/yolov5n-seg.yaml",
"chars": 1410,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/segment/yolov5s-seg.yaml",
"chars": 1409,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/segment/yolov5x-seg.yaml",
"chars": 1410,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/tf.py",
"chars": 27015,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nTensorFlow, Keras and TFLite versions of YOLOv5\nAuthored by https://githu"
},
{
"path": "yolo-improve/yolov5-AUX/models/yolo.py",
"chars": 18133,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nYOLO-specific modules\n\nUsage:\n $ python models/yolo.py --cfg yolov5s.y"
},
{
"path": "yolo-improve/yolov5-AUX/models/yolov5_aux.yaml",
"chars": 1521,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/yolov5l.yaml",
"chars": 1398,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.0 # model depth "
},
{
"path": "yolo-improve/yolov5-AUX/models/yolov5m.yaml",
"chars": 1400,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.67 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/yolov5n.yaml",
"chars": 1400,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/yolov5s.yaml",
"chars": 1400,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 0.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/models/yolov5x.yaml",
"chars": 1400,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\n# Parameters\nnc: 80 # number of classes\ndepth_multiple: 1.33 # model depth"
},
{
"path": "yolo-improve/yolov5-AUX/train.py",
"chars": 33826,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nTrain a YOLOv5 model on a custom dataset.\nModels and datasets download au"
},
{
"path": "yolo-improve/yolov5-AUX/utils/__init__.py",
"chars": 2274,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nutils/initialization\n\"\"\"\n\nimport contextlib\nimport platform\nimport thread"
},
{
"path": "yolo-improve/yolov5-AUX/utils/activations.py",
"chars": 3446,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nActivation functions\n\"\"\"\n\nimport torch\nimport torch.nn as nn\nimport torch"
},
{
"path": "yolo-improve/yolov5-AUX/utils/augmentations.py",
"chars": 17023,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nImage augmentation functions\n\"\"\"\n\nimport math\nimport random\n\nimport cv2\ni"
},
{
"path": "yolo-improve/yolov5-AUX/utils/autoanchor.py",
"chars": 7397,
"preview": "# YOLOv5 🚀 by Ultralytics, GPL-3.0 license\n\"\"\"\nAutoAnchor utils\n\"\"\"\n\nimport random\n\nimport numpy as np\nimport torch\nimpo"
}
]
// ... and 151 more files (download for full content)
About this extraction
This page contains the full source code of the z1069614715/objectdetection_script GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 351 files (2.2 MB), approximately 600.9k tokens, and a symbol index with 2361 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.