main 6e3fdda39508 cached
463 files
12.3 MB
3.3M tokens
6238 symbols
1 requests
Copy disabled (too large) Download .txt
Showing preview only (13,016K chars total). Download the full file to get everything.
Repository: huggingface/pytorch-image-models
Branch: main
Commit: 6e3fdda39508
Files: 463
Total size: 12.3 MB

Directory structure:
gitextract_0n5u8q1i/

├── .gitattributes
├── .github/
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   ├── config.yml
│   │   └── feature_request.md
│   └── workflows/
│       ├── build_documentation.yml
│       ├── build_pr_documentation.yml
│       ├── tests.yml
│       ├── trufflehog.yml
│       └── upload_pr_documentation.yml
├── .gitignore
├── CITATION.cff
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── UPGRADING.md
├── avg_checkpoints.py
├── benchmark.py
├── bulk_runner.py
├── clean_checkpoint.py
├── convert/
│   ├── convert_from_mxnet.py
│   └── convert_nest_flax.py
├── distributed_train.sh
├── hfdocs/
│   ├── README.md
│   └── source/
│       ├── _toctree.yml
│       ├── changes.mdx
│       ├── feature_extraction.mdx
│       ├── hf_hub.mdx
│       ├── hparams.mdx
│       ├── index.mdx
│       ├── installation.mdx
│       ├── models/
│       │   ├── adversarial-inception-v3.mdx
│       │   ├── advprop.mdx
│       │   ├── big-transfer.mdx
│       │   ├── csp-darknet.mdx
│       │   ├── csp-resnet.mdx
│       │   ├── csp-resnext.mdx
│       │   ├── densenet.mdx
│       │   ├── dla.mdx
│       │   ├── dpn.mdx
│       │   ├── ecaresnet.mdx
│       │   ├── efficientnet-pruned.mdx
│       │   ├── efficientnet.mdx
│       │   ├── ensemble-adversarial.mdx
│       │   ├── ese-vovnet.mdx
│       │   ├── fbnet.mdx
│       │   ├── gloun-inception-v3.mdx
│       │   ├── gloun-resnet.mdx
│       │   ├── gloun-resnext.mdx
│       │   ├── gloun-senet.mdx
│       │   ├── gloun-seresnext.mdx
│       │   ├── gloun-xception.mdx
│       │   ├── hrnet.mdx
│       │   ├── ig-resnext.mdx
│       │   ├── inception-resnet-v2.mdx
│       │   ├── inception-v3.mdx
│       │   ├── inception-v4.mdx
│       │   ├── legacy-se-resnet.mdx
│       │   ├── legacy-se-resnext.mdx
│       │   ├── legacy-senet.mdx
│       │   ├── mixnet.mdx
│       │   ├── mnasnet.mdx
│       │   ├── mobilenet-v2.mdx
│       │   ├── mobilenet-v3.mdx
│       │   ├── nasnet.mdx
│       │   ├── noisy-student.mdx
│       │   ├── pnasnet.mdx
│       │   ├── regnetx.mdx
│       │   ├── regnety.mdx
│       │   ├── res2net.mdx
│       │   ├── res2next.mdx
│       │   ├── resnest.mdx
│       │   ├── resnet-d.mdx
│       │   ├── resnet.mdx
│       │   ├── resnext.mdx
│       │   ├── rexnet.mdx
│       │   ├── se-resnet.mdx
│       │   ├── selecsls.mdx
│       │   ├── seresnext.mdx
│       │   ├── skresnet.mdx
│       │   ├── skresnext.mdx
│       │   ├── spnasnet.mdx
│       │   ├── ssl-resnet.mdx
│       │   ├── swsl-resnet.mdx
│       │   ├── swsl-resnext.mdx
│       │   ├── tf-efficientnet-condconv.mdx
│       │   ├── tf-efficientnet-lite.mdx
│       │   ├── tf-efficientnet.mdx
│       │   ├── tf-inception-v3.mdx
│       │   ├── tf-mixnet.mdx
│       │   ├── tf-mobilenet-v3.mdx
│       │   ├── tresnet.mdx
│       │   ├── wide-resnet.mdx
│       │   └── xception.mdx
│       ├── models.mdx
│       ├── quickstart.mdx
│       ├── reference/
│       │   ├── data.mdx
│       │   ├── models.mdx
│       │   ├── optimizers.mdx
│       │   └── schedulers.mdx
│       ├── results.mdx
│       └── training_script.mdx
├── hubconf.py
├── inference.py
├── onnx_export.py
├── onnx_validate.py
├── pyproject.toml
├── requirements-dev.txt
├── requirements.txt
├── results/
│   ├── README.md
│   ├── benchmark-infer-amp-nchw-pt113-cu117-rtx3090.csv
│   ├── benchmark-infer-amp-nchw-pt210-cu121-rtx3090.csv
│   ├── benchmark-infer-amp-nchw-pt240-cu124-rtx3090.csv
│   ├── benchmark-infer-amp-nchw-pt240-cu124-rtx4090-dynamo.csv
│   ├── benchmark-infer-amp-nchw-pt240-cu124-rtx4090.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu128-4090-dynamo.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu128-4090.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu130-5090-dynamo.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu130-5090.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu130-pro6000maxq-dynamo.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu130-pro6000maxq.csv
│   ├── benchmark-infer-amp-nhwc-pt113-cu117-rtx3090.csv
│   ├── benchmark-infer-amp-nhwc-pt210-cu121-rtx3090.csv
│   ├── benchmark-infer-amp-nhwc-pt240-cu124-rtx3090.csv
│   ├── benchmark-infer-amp-nhwc-pt240-cu124-rtx4090.csv
│   ├── benchmark-infer-amp_bf16-nchw-pt291-cu130-pro6000maxq-dynamo.csv
│   ├── benchmark-infer-bf16-nchw-pt291-cu130-pro6000maxq-dynamo.csv
│   ├── benchmark-infer-fp32-nchw-pt221-cpu-i9_10940x-dynamo.csv
│   ├── benchmark-infer-fp32-nchw-pt240-cpu-i7_12700h-dynamo.csv
│   ├── benchmark-infer-fp32-nchw-pt240-cpu-i9_10940x-dynamo.csv
│   ├── benchmark-train-amp-nchw-pt112-cu113-rtx3090.csv
│   ├── benchmark-train-amp-nhwc-pt112-cu113-rtx3090.csv
│   ├── generate_csv_results.py
│   ├── model_metadata-in1k.csv
│   ├── results-imagenet-a-clean.csv
│   ├── results-imagenet-a.csv
│   ├── results-imagenet-r-clean.csv
│   ├── results-imagenet-r.csv
│   ├── results-imagenet-real.csv
│   ├── results-imagenet.csv
│   ├── results-imagenetv2-matched-frequency.csv
│   └── results-sketch.csv
├── setup.cfg
├── tests/
│   ├── __init__.py
│   ├── test_checkpoint_loading.py
│   ├── test_layers.py
│   ├── test_layers_drop.py
│   ├── test_layers_pool.py
│   ├── test_models.py
│   ├── test_optim.py
│   ├── test_scheduler.py
│   └── test_utils.py
├── timm/
│   ├── __init__.py
│   ├── data/
│   │   ├── __init__.py
│   │   ├── _info/
│   │   │   ├── imagenet12k_synsets.txt
│   │   │   ├── imagenet21k_goog_synsets.txt
│   │   │   ├── imagenet21k_goog_to_12k_indices.txt
│   │   │   ├── imagenet21k_goog_to_22k_indices.txt
│   │   │   ├── imagenet21k_miil_synsets.txt
│   │   │   ├── imagenet21k_miil_w21_synsets.txt
│   │   │   ├── imagenet22k_ms_synsets.txt
│   │   │   ├── imagenet22k_ms_to_12k_indices.txt
│   │   │   ├── imagenet22k_ms_to_22k_indices.txt
│   │   │   ├── imagenet22k_synsets.txt
│   │   │   ├── imagenet22k_to_12k_indices.txt
│   │   │   ├── imagenet_a_indices.txt
│   │   │   ├── imagenet_a_synsets.txt
│   │   │   ├── imagenet_r_indices.txt
│   │   │   ├── imagenet_r_synsets.txt
│   │   │   ├── imagenet_real_labels.json
│   │   │   ├── imagenet_synset_to_definition.txt
│   │   │   ├── imagenet_synset_to_lemma.txt
│   │   │   ├── imagenet_synsets.txt
│   │   │   ├── mini_imagenet_indices.txt
│   │   │   └── mini_imagenet_synsets.txt
│   │   ├── auto_augment.py
│   │   ├── config.py
│   │   ├── constants.py
│   │   ├── dataset.py
│   │   ├── dataset_factory.py
│   │   ├── dataset_info.py
│   │   ├── distributed_sampler.py
│   │   ├── imagenet_info.py
│   │   ├── loader.py
│   │   ├── mixup.py
│   │   ├── naflex_dataset.py
│   │   ├── naflex_loader.py
│   │   ├── naflex_mixup.py
│   │   ├── naflex_random_erasing.py
│   │   ├── naflex_transforms.py
│   │   ├── random_erasing.py
│   │   ├── readers/
│   │   │   ├── __init__.py
│   │   │   ├── class_map.py
│   │   │   ├── img_extensions.py
│   │   │   ├── reader.py
│   │   │   ├── reader_factory.py
│   │   │   ├── reader_hfds.py
│   │   │   ├── reader_hfids.py
│   │   │   ├── reader_image_folder.py
│   │   │   ├── reader_image_in_tar.py
│   │   │   ├── reader_image_tar.py
│   │   │   ├── reader_tfds.py
│   │   │   ├── reader_wds.py
│   │   │   └── shared_count.py
│   │   ├── real_labels.py
│   │   ├── tf_preprocessing.py
│   │   ├── transforms.py
│   │   └── transforms_factory.py
│   ├── layers/
│   │   ├── __init__.py
│   │   ├── _fx.py
│   │   ├── activations.py
│   │   ├── activations_me.py
│   │   ├── adaptive_avgmax_pool.py
│   │   ├── attention.py
│   │   ├── attention2d.py
│   │   ├── attention_pool.py
│   │   ├── attention_pool2d.py
│   │   ├── blur_pool.py
│   │   ├── bottleneck_attn.py
│   │   ├── cbam.py
│   │   ├── classifier.py
│   │   ├── cond_conv2d.py
│   │   ├── config.py
│   │   ├── conv2d_same.py
│   │   ├── conv_bn_act.py
│   │   ├── coord_attn.py
│   │   ├── create_act.py
│   │   ├── create_attn.py
│   │   ├── create_conv2d.py
│   │   ├── create_norm.py
│   │   ├── create_norm_act.py
│   │   ├── diff_attention.py
│   │   ├── drop.py
│   │   ├── eca.py
│   │   ├── evo_norm.py
│   │   ├── fast_norm.py
│   │   ├── filter_response_norm.py
│   │   ├── format.py
│   │   ├── gather_excite.py
│   │   ├── global_context.py
│   │   ├── grid.py
│   │   ├── grn.py
│   │   ├── halo_attn.py
│   │   ├── helpers.py
│   │   ├── hybrid_embed.py
│   │   ├── inplace_abn.py
│   │   ├── interpolate.py
│   │   ├── lambda_layer.py
│   │   ├── layer_scale.py
│   │   ├── linear.py
│   │   ├── median_pool.py
│   │   ├── mixed_conv2d.py
│   │   ├── ml_decoder.py
│   │   ├── mlp.py
│   │   ├── non_local_attn.py
│   │   ├── norm.py
│   │   ├── norm_act.py
│   │   ├── other_pool.py
│   │   ├── padding.py
│   │   ├── patch_dropout.py
│   │   ├── patch_embed.py
│   │   ├── pool1d.py
│   │   ├── pool2d_same.py
│   │   ├── pos_embed.py
│   │   ├── pos_embed_rel.py
│   │   ├── pos_embed_sincos.py
│   │   ├── selective_kernel.py
│   │   ├── separable_conv.py
│   │   ├── space_to_depth.py
│   │   ├── split_attn.py
│   │   ├── split_batchnorm.py
│   │   ├── squeeze_excite.py
│   │   ├── std_conv.py
│   │   ├── test_time_pool.py
│   │   ├── trace_utils.py
│   │   ├── typing.py
│   │   └── weight_init.py
│   ├── loss/
│   │   ├── __init__.py
│   │   ├── asymmetric_loss.py
│   │   ├── binary_cross_entropy.py
│   │   ├── cross_entropy.py
│   │   └── jsd.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── _builder.py
│   │   ├── _efficientnet_blocks.py
│   │   ├── _efficientnet_builder.py
│   │   ├── _factory.py
│   │   ├── _features.py
│   │   ├── _features_fx.py
│   │   ├── _helpers.py
│   │   ├── _hub.py
│   │   ├── _manipulate.py
│   │   ├── _pretrained.py
│   │   ├── _prune.py
│   │   ├── _pruned/
│   │   │   ├── ecaresnet101d_pruned.txt
│   │   │   ├── ecaresnet50d_pruned.txt
│   │   │   ├── efficientnet_b1_pruned.txt
│   │   │   ├── efficientnet_b2_pruned.txt
│   │   │   └── efficientnet_b3_pruned.txt
│   │   ├── _registry.py
│   │   ├── beit.py
│   │   ├── byoanet.py
│   │   ├── byobnet.py
│   │   ├── cait.py
│   │   ├── coat.py
│   │   ├── convit.py
│   │   ├── convmixer.py
│   │   ├── convnext.py
│   │   ├── crossvit.py
│   │   ├── csatv2.py
│   │   ├── cspnet.py
│   │   ├── davit.py
│   │   ├── deit.py
│   │   ├── densenet.py
│   │   ├── dla.py
│   │   ├── dpn.py
│   │   ├── edgenext.py
│   │   ├── efficientformer.py
│   │   ├── efficientformer_v2.py
│   │   ├── efficientnet.py
│   │   ├── efficientvit_mit.py
│   │   ├── efficientvit_msra.py
│   │   ├── eva.py
│   │   ├── factory.py
│   │   ├── fasternet.py
│   │   ├── fastvit.py
│   │   ├── features.py
│   │   ├── focalnet.py
│   │   ├── fx_features.py
│   │   ├── gcvit.py
│   │   ├── ghostnet.py
│   │   ├── hardcorenas.py
│   │   ├── helpers.py
│   │   ├── hgnet.py
│   │   ├── hiera.py
│   │   ├── hieradet_sam2.py
│   │   ├── hrnet.py
│   │   ├── hub.py
│   │   ├── inception_next.py
│   │   ├── inception_resnet_v2.py
│   │   ├── inception_v3.py
│   │   ├── inception_v4.py
│   │   ├── layers/
│   │   │   └── __init__.py
│   │   ├── levit.py
│   │   ├── mambaout.py
│   │   ├── maxxvit.py
│   │   ├── metaformer.py
│   │   ├── mlp_mixer.py
│   │   ├── mobilenetv3.py
│   │   ├── mobilenetv5.py
│   │   ├── mobilevit.py
│   │   ├── mvitv2.py
│   │   ├── naflexvit.py
│   │   ├── nasnet.py
│   │   ├── nest.py
│   │   ├── nextvit.py
│   │   ├── nfnet.py
│   │   ├── pit.py
│   │   ├── pnasnet.py
│   │   ├── pvt_v2.py
│   │   ├── rdnet.py
│   │   ├── registry.py
│   │   ├── regnet.py
│   │   ├── repghost.py
│   │   ├── repvit.py
│   │   ├── res2net.py
│   │   ├── resnest.py
│   │   ├── resnet.py
│   │   ├── resnetv2.py
│   │   ├── rexnet.py
│   │   ├── selecsls.py
│   │   ├── senet.py
│   │   ├── sequencer.py
│   │   ├── shvit.py
│   │   ├── sknet.py
│   │   ├── starnet.py
│   │   ├── swiftformer.py
│   │   ├── swin_transformer.py
│   │   ├── swin_transformer_v2.py
│   │   ├── swin_transformer_v2_cr.py
│   │   ├── tiny_vit.py
│   │   ├── tnt.py
│   │   ├── tresnet.py
│   │   ├── twins.py
│   │   ├── vgg.py
│   │   ├── visformer.py
│   │   ├── vision_transformer.py
│   │   ├── vision_transformer_hybrid.py
│   │   ├── vision_transformer_relpos.py
│   │   ├── vision_transformer_sam.py
│   │   ├── vitamin.py
│   │   ├── volo.py
│   │   ├── vovnet.py
│   │   ├── xception.py
│   │   ├── xception_aligned.py
│   │   └── xcit.py
│   ├── optim/
│   │   ├── __init__.py
│   │   ├── _optim_factory.py
│   │   ├── _param_groups.py
│   │   ├── _types.py
│   │   ├── adabelief.py
│   │   ├── adafactor.py
│   │   ├── adafactor_bv.py
│   │   ├── adahessian.py
│   │   ├── adamp.py
│   │   ├── adamw.py
│   │   ├── adan.py
│   │   ├── adopt.py
│   │   ├── kron.py
│   │   ├── lamb.py
│   │   ├── laprop.py
│   │   ├── lars.py
│   │   ├── lion.py
│   │   ├── lookahead.py
│   │   ├── madgrad.py
│   │   ├── mars.py
│   │   ├── muon.py
│   │   ├── nadam.py
│   │   ├── nadamw.py
│   │   ├── nvnovograd.py
│   │   ├── optim_factory.py
│   │   ├── radam.py
│   │   ├── rmsprop_tf.py
│   │   ├── sgdp.py
│   │   └── sgdw.py
│   ├── py.typed
│   ├── scheduler/
│   │   ├── __init__.py
│   │   ├── cosine_lr.py
│   │   ├── multistep_lr.py
│   │   ├── plateau_lr.py
│   │   ├── poly_lr.py
│   │   ├── scheduler.py
│   │   ├── scheduler_factory.py
│   │   ├── step_lr.py
│   │   └── tanh_lr.py
│   ├── task/
│   │   ├── __init__.py
│   │   ├── classification.py
│   │   ├── distillation.py
│   │   ├── task.py
│   │   └── token_distillation.py
│   ├── utils/
│   │   ├── __init__.py
│   │   ├── agc.py
│   │   ├── attention_extract.py
│   │   ├── checkpoint_saver.py
│   │   ├── clip_grad.py
│   │   ├── cuda.py
│   │   ├── decay_batch.py
│   │   ├── distributed.py
│   │   ├── jit.py
│   │   ├── log.py
│   │   ├── metrics.py
│   │   ├── misc.py
│   │   ├── model.py
│   │   ├── model_ema.py
│   │   ├── onnx.py
│   │   ├── random.py
│   │   └── summary.py
│   └── version.py
├── train.py
└── validate.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitattributes
================================================
*.ipynb linguist-documentation


================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms
github: rwightman


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a bug report to help us improve. Issues are for reporting bugs or requesting
  features, the discussion forum is available for asking questions or seeking help
  from the community.
title: "[BUG] Issue title..."
labels: bug
assignees: rwightman

---

**Describe the bug**
A clear and concise description of what the bug is.

**To Reproduce**
Steps to reproduce the behavior:
1.
2.

**Expected behavior**
A clear and concise description of what you expected to happen.

**Screenshots**
If applicable, add screenshots to help explain your problem.

**Desktop (please complete the following information):**
 - OS: [e.g. Windows 10, Ubuntu 18.04]
 - This repository version [e.g. pip 0.3.1 or commit ref]
 - PyTorch version w/ CUDA/cuDNN  [e.g. from `conda list`, 1.7.0  py3.8_cuda11.0.221_cudnn8.0.3_0]

**Additional context**
Add any other context about the problem here.


================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
contact_links:
  - name: Community Discussions
    url: https://github.com/rwightman/pytorch-image-models/discussions
    about: Hparam request in issues will be ignored! Issues are for features and bugs. Questions can be asked in Discussions.


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project. Hparam requests, training help are not feature requests.
  The discussion forum is available for asking questions or seeking help from the community.
title: "[FEATURE] Feature title..."
labels: enhancement
assignees: ''

---

**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is.

**Describe the solution you'd like**
A clear and concise description of what you want to happen.

**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.

**Additional context**
Add any other context or screenshots about the feature request here.


================================================
FILE: .github/workflows/build_documentation.yml
================================================
name: Build documentation

on:
  push:
    branches:
      - main
      - doc-builder*
      - v*-release

jobs:
   build:
    uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
    with:
      commit_sha: ${{ github.sha }}
      package: pytorch-image-models
      package_name: timm
      path_to_docs: pytorch-image-models/hfdocs/source
      version_tag_suffix: ""
    secrets:
      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}


================================================
FILE: .github/workflows/build_pr_documentation.yml
================================================
name: Build PR Documentation

on:
  pull_request:

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

jobs:
  build:
    uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
    with:
      commit_sha: ${{ github.event.pull_request.head.sha }}
      pr_number: ${{ github.event.number }}
      package: pytorch-image-models
      package_name: timm
      path_to_docs: pytorch-image-models/hfdocs/source
      version_tag_suffix: ""


================================================
FILE: .github/workflows/tests.yml
================================================
name: Python tests

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]

env:
  OMP_NUM_THREADS: 2
  MKL_NUM_THREADS: 2

jobs:
  test:
    name: Run tests on ${{ matrix.os }} with Python ${{ matrix.python }}
    strategy:
      matrix:
        os: [ubuntu-latest]
        python: ['3.10', '3.13']
        torch: [{base: '1.13.0', vision: '0.14.0'}, {base: '2.9.1', vision: '0.24.1'}]
        testmarker: ['-k "not test_models"', '-m base', '-m cfg', '-m torchscript', '-m features', '-m fxforward', '-m fxbackward']
        exclude:
          - python: '3.13'
            torch: {base: '1.13.0', vision: '0.14.0'}
    runs-on: ${{ matrix.os }}

    steps:
    - uses: actions/checkout@v6
    - name: Set up Python ${{ matrix.python }}
      uses: actions/setup-python@v6
      with:
        python-version: ${{ matrix.python }}
    - name: Install testing dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements-dev.txt
    - name: Install torch on mac
      if: startsWith(matrix.os, 'macOS')
      run: pip install --no-cache-dir torch==${{ matrix.torch.base }} torchvision==${{ matrix.torch.vision }}
    - name: Install torch on Windows
      if: startsWith(matrix.os, 'windows')
      run: pip install --no-cache-dir torch==${{ matrix.torch.base }} torchvision==${{ matrix.torch.vision }}
    - name: Install torch on ubuntu
      if: startsWith(matrix.os, 'ubuntu')
      run: |
        sudo sed -i 's/azure\.//' /etc/apt/sources.list
        sudo apt update
        sudo apt install -y google-perftools
        pip install --no-cache-dir torch==${{ matrix.torch.base }}+cpu torchvision==${{ matrix.torch.vision }}+cpu --index-url https://download.pytorch.org/whl/cpu
    - name: Install requirements
      run: |
        pip install -r requirements.txt
    - name: Force old numpy for old torch
      if: ${{ matrix.torch.base == '1.13.0' }}
      run: pip install --upgrade 'numpy<2.0'
    - name: Run tests on Windows
      if: startsWith(matrix.os, 'windows')
      env:
        PYTHONDONTWRITEBYTECODE: 1
      run: |
        pytest -vv tests
    - name: Run '${{ matrix.testmarker }}' tests on Linux / Mac
      if: ${{ !startsWith(matrix.os, 'windows') }}
      env:
        LD_PRELOAD: /usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
        PYTHONDONTWRITEBYTECODE: 1
      run: |
        pytest -vv --forked --durations=0 ${{ matrix.testmarker }} tests


================================================
FILE: .github/workflows/trufflehog.yml
================================================
on:
  push:

name: Secret Leaks

jobs:
  trufflehog:
    runs-on: ubuntu-latest
    steps:
    - name: Checkout code
      uses: actions/checkout@v6
      with:
        fetch-depth: 0
    - name: Secret Scanning
      uses: trufflesecurity/trufflehog@main


================================================
FILE: .github/workflows/upload_pr_documentation.yml
================================================
name: Upload PR Documentation

on:
  workflow_run:
    workflows: ["Build PR Documentation"]
    types:
      - completed

jobs:
  build:
    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
    with:
      package_name: timm
    secrets:
      hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
      comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# PyCharm
.idea

output/

# PyTorch weights
*.tar
*.pth
*.pt
*.torch
*.gz
Untitled.ipynb
Testing notebook.ipynb

# Root dir exclusions
/*.csv
/*.yaml
/*.json
/*.jpg
/*.png
/*.zip
/*.tar.*

================================================
FILE: CITATION.cff
================================================
message: "If you use this software, please cite it as below."
title: "PyTorch Image Models"
version: "1.2.2"
doi: "10.5281/zenodo.4414861" 
authors:
  - family-names: Wightman
    given-names: Ross
version: 1.0.11
year: "2019"
url: "https://github.com/huggingface/pytorch-image-models"
license: "Apache 2.0"

================================================
FILE: CLAUDE.md
================================================
# CLAUDE.md - PyTorch Image Models (timm)

## Build/Test Commands
- Install: `python -m pip install -e .`
- Run tests: `pytest tests/`
- Run specific test: `pytest tests/test_models.py::test_specific_function -v`
- Run tests in parallel: `pytest -n 4 tests/`
- Filter tests: `pytest -k "substring-to-match" tests/`

## Code Style Guidelines
- Line length: 120 chars
- Indentation: 4-space hanging indents, arguments should have an extra level of indent, use 'sadface' (closing parenthesis and colon on a separate line)
- Typing: Use PEP484 type annotations in function signatures
- Docstrings: Google style (do not duplicate type annotations and defaults)
- Imports: Standard library first, then third-party, then local
- Function naming: snake_case
- Class naming: PascalCase
- Error handling: Use try/except with specific exceptions
- Conditional expressions: Use parentheses for complex expressions

================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct

## Our Pledge

We as members, contributors, and leaders pledge to participate in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.

We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.

## Our Standards

Examples of behavior that contributes to a positive environment for our
community includes:

* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
* Focusing on what is best not just for us as individuals, but for the overall
  community

Examples of unacceptable behavior include:

* The use of sexualized language or imagery, and sexual attention or advances of
  any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
  without their explicit permission
* Other conduct that could reasonably be considered inappropriate in a
  professional setting

## Enforcement Responsibilities

Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.

Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.

## Scope

This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
feedback@huggingface.co.
All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the
reporter of any incident.

## Enforcement Guidelines

Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:

### 1. Correction

**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.

**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.

### 2. Warning

**Community Impact**: A violation through a single incident or series of
actions.

**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period. This
includes avoiding interactions in community spaces and external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.

### 3. Temporary Ban

**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.

**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.

### 4. Permanent Ban

**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.

**Consequence**: A permanent ban from any public interaction within the
community.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.1, available at
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].

Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].

For answers to common questions about this code of conduct, see the FAQ at
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
[https://www.contributor-covenant.org/translations][translations].

[homepage]: https://www.contributor-covenant.org
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
[Mozilla CoC]: https://github.com/mozilla/diversity
[FAQ]: https://www.contributor-covenant.org/faq
[translations]: https://www.contributor-covenant.org/translations


================================================
FILE: CONTRIBUTING.md
================================================
*This guideline is very much a work-in-progress.*

Contributions to `timm` for code, documentation, tests are more than welcome!

There haven't been any formal guidelines to date so please bear with me, and feel free to add to this guide.

# Coding style

Code linting and auto-format (black) are not currently in place but open to consideration. In the meantime, the style to follow is (mostly) aligned with Google's guide: https://google.github.io/styleguide/pyguide.html. 

A few specific differences from Google style (or black)
1. Line length is 120 char. Going over is okay in some cases (e.g. I prefer not to break URL across lines).
2. Hanging indents are always preferred, please avoid aligning arguments with closing brackets or braces.

Example, from Google guide, but this is a NO here:
```python
   # Aligned with opening delimiter.
   foo = long_function_name(var_one, var_two,
                            var_three, var_four)
   meal = (spam,
           beans)

   # Aligned with opening delimiter in a dictionary.
   foo = {
       'long_dictionary_key': value1 +
                              value2,
       ...
   }
```
This is YES:

```python
   # 4-space hanging indent; nothing on first line,
   # closing parenthesis on a new line.
   foo = long_function_name(
       var_one, var_two, var_three,
       var_four
   )
   meal = (
       spam,
       beans,
   )

   # 4-space hanging indent in a dictionary.
   foo = {
       'long_dictionary_key':
           long_dictionary_value,
       ...
   }
```

While preferred `timm` style is *mostly* compatible with Black / Ruff. Since I've been following PEP 8 style since before Black was a thing, there's one area I can't agree on, function arg indents. From a Black example this:
```python
def very_important_function(
    template: str,
    *variables,
    file: os.PathLike,
    engine: str,
    header: bool = True,
    debug: bool = False,
):
    with open(file, "w") as f:
        ...
```

Should according to PEP 8 (https://peps.python.org/pep-0008/#indentation) have an extra level of indent on the args:

```python
def very_important_function(
        template: str,
        *variables,
        file: os.PathLike,
        engine: str,
        header: bool = True,
        debug: bool = False,
):
    with open(file, "w") as f:
        ...
```

I do like sadface though. So please don't run Black on existing files and convert all of the arg indents. Thanks!

When there is discrepancy in a given source file (there are many origins for various bits of code and not all have been updated to what I consider current goal), please follow the style in a given file.

Please avoid formatting code that is unrelated to your PR.

PR with pure formatting / style fixes will be accepted but only in isolation from functional changes, best to ask before starting such a change.

# Documentation

As with code style, docstrings style based on the Google guide: guide: https://google.github.io/styleguide/pyguide.html

The goal for the code is to eventually move to have all major functions and `__init__` methods use PEP484 type annotations.

When type annotations are used for a function, as per the Google pyguide, they should **NOT** be duplicated in the docstrings, please leave annotations as the one source of truth re typing.

There are a LOT of gaps in current documentation relative to the functionality in timm, please, document away!

# Installation

Create a Python virtual environment using Python 3.10. Inside the environment, install torch` and `torchvision` using the instructions matching your system as listed on the [PyTorch website](https://pytorch.org/).

Then install the remaining dependencies:

```
python -m pip install -r requirements.txt
python -m pip install -r requirements-dev.txt  # for testing
python -m pip install -e .
```

## Unit tests

Run the tests using:

```
pytest tests/
```

Since the whole test suite takes a lot of time to run locally (a few hours), you may want to select a subset of tests relating to the changes you made by using the `-k` option of [`pytest`](https://docs.pytest.org/en/7.1.x/example/markers.html#using-k-expr-to-select-tests-based-on-their-name). Moreover, running tests in parallel (in this example 4 processes) with the `-n` option may help:

```
pytest -k "substring-to-match" -n 4 tests/
```

## Building documentation

Please refer to [this document](https://github.com/huggingface/pytorch-image-models/tree/main/hfdocs).

# Questions

If you have any questions about contribution, where / how to contribute, please ask in the [Discussions](https://github.com/huggingface/pytorch-image-models/discussions/categories/contributing) (there is a `Contributing` topic).


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "{}"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright 2019 Ross Wightman

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: MANIFEST.in
================================================
include timm/models/_pruned/*.txt
include timm/data/_info/*.txt
include timm/data/_info/*.json


================================================
FILE: README.md
================================================
# PyTorch Image Models
- [What's New](#whats-new)
- [Introduction](#introduction)
- [Models](#models)
- [Features](#features)
- [Results](#results)
- [Getting Started (Documentation)](#getting-started-documentation)
- [Train, Validation, Inference Scripts](#train-validation-inference-scripts)
- [Awesome PyTorch Resources](#awesome-pytorch-resources)
- [Licenses](#licenses)
- [Citing](#citing)

## What's New

## Feb 23, 2026
* Add token distillation training support to distillation task wrappers
* Remove some torch.jit usage in prep for official deprecation
* Caution added to AdamP optimizer
* Call reset_parameters() even if meta-device init so that buffers get init w/ hacks like init_empty_weights
* Tweak Muon optimizer to work with DTensor/FSDP2 (clamp_ instead of clamp_min_, alternate NS branch for DTensor)
* Release 1.0.25

## Jan 21, 2026
* **Compat Break**: Fix oversight w/ QKV vs MLP bias in `ParallelScalingBlock` (& `DiffParallelScalingBlock`)
  * Does not impact any trained `timm` models but could impact downstream use.

## Jan 5 & 6, 2026
* Release 1.0.24
* Add new benchmark result csv files for inference timing on all models w/ RTX Pro 6000, 5090, and 4090 cards w/ PyTorch 2.9.1
* Fix moved module error in deprecated timm.models.layers import path that impacts legacy imports
* Release 1.0.23

## Dec 30, 2025
* Add better NAdaMuon trained `dpwee`, `dwee`, `dlittle` (differential) ViTs with a small boost over previous runs
  * https://huggingface.co/timm/vit_dlittle_patch16_reg1_gap_256.sbb_nadamuon_in1k (83.24% top-1)
  * https://huggingface.co/timm/vit_dwee_patch16_reg1_gap_256.sbb_nadamuon_in1k  (81.80% top-1)
  * https://huggingface.co/timm/vit_dpwee_patch16_reg1_gap_256.sbb_nadamuon_in1k (81.67% top-1)
* Add a ~21M param `timm` variant of the CSATv2 model at 512x512 & 640x640
  * https://huggingface.co/timm/csatv2_21m.sw_r640_in1k (83.13% top-1)
  * https://huggingface.co/timm/csatv2_21m.sw_r512_in1k (82.58% top-1)
* Factor non-persistent param init out of `__init__` into a common method that can be externally called via `init_non_persistent_buffers()` after meta-device init. 
  
## Dec 12, 2025
* Add CSATV2 model (thanks https://github.com/gusdlf93) -- a lightweight but high res model with DCT stem & spatial attention. https://huggingface.co/Hyunil/CSATv2
* Add AdaMuon and NAdaMuon optimizer support to existing `timm` Muon impl. Appears more competitive vs AdamW with familiar hparams for image tasks.
* End of year PR cleanup, merge aspects of several long open PR
  * Merge differential attention (`DiffAttention`), add corresponding `DiffParallelScalingBlock` (for ViT), train some wee vits
    * https://huggingface.co/timm/vit_dwee_patch16_reg1_gap_256.sbb_in1k
    * https://huggingface.co/timm/vit_dpwee_patch16_reg1_gap_256.sbb_in1k
  * Add a few pooling modules, `LsePlus` and `SimPool`
  * Cleanup, optimize `DropBlock2d` (also add support to ByobNet based models)
* Bump unit tests to PyTorch 2.9.1 + Python 3.13 on upper end, lower still PyTorch 1.13 + Python 3.10
  
## Dec 1, 2025
* Add lightweight task abstraction, add logits and feature distillation support to train script via new tasks.
* Remove old APEX AMP support

## Nov 4, 2025
* Fix LayerScale / LayerScale2d init bug (init values ignored), introduced in 1.0.21. Thanks https://github.com/Ilya-Fradlin
* Release 1.0.22

## Oct 31, 2025 🎃
* Update imagenet & OOD variant result csv files to include a few new models and verify correctness over several torch & timm versions
* EfficientNet-X and EfficientNet-H B5 model weights added as part of a hparam search for AdamW vs Muon (still iterating on Muon runs)

## Oct 16-20, 2025
* Add an impl of the Muon optimizer (based on https://github.com/KellerJordan/Muon) with customizations
  * extra flexibility and improved handling for conv weights and fallbacks for weight shapes not suited for orthogonalization
  * small speedup for NS iterations by reducing allocs and using fused (b)add(b)mm ops
  * by default uses AdamW (or NAdamW if `nesterov=True`) updates if muon not suitable for parameter shape (or excluded via param group flag)
  * like torch impl, select from several LR scale adjustment fns via `adjust_lr_fn`
  * select from several NS coefficient presets or specify your own via `ns_coefficients`
* First 2 steps of 'meta' device model initialization supported
  * Fix several ops that were breaking creation under 'meta' device context
  * Add device & dtype factory kwarg support to all models and modules (anything inherting from nn.Module) in `timm`
* License fields added to pretrained cfgs in code
* Release 1.0.21

## Sept 21, 2025
* Remap DINOv3 ViT weight tags from `lvd_1689m` -> `lvd1689m` to match (same for `sat_493m` -> `sat493m`)
* Release 1.0.20

## Sept 17, 2025
* DINOv3 (https://arxiv.org/abs/2508.10104) ConvNeXt and ViT models added. ConvNeXt models were mapped to existing `timm` model. ViT support done via the EVA base model w/ a new `RotaryEmbeddingDinoV3` to match the DINOv3 specific RoPE impl
  * HuggingFace Hub: https://huggingface.co/collections/timm/timm-dinov3-68cb08bb0bee365973d52a4d
* MobileCLIP-2 (https://arxiv.org/abs/2508.20691) vision encoders. New MCI3/MCI4 FastViT variants added and weights mapped to existing FastViT and B, L/14 ViTs.
* MetaCLIP-2 Worldwide (https://arxiv.org/abs/2507.22062) ViT encoder weights added.
* SigLIP-2 (https://arxiv.org/abs/2502.14786) NaFlex ViT encoder weights added via timm NaFlexViT model.
* Misc fixes and contributions

## July 23, 2025
* Add `set_input_size()` method to EVA models, used by OpenCLIP 3.0.0 to allow resizing for timm based encoder models.
* Release 1.0.18, needed for PE-Core S & T models in OpenCLIP 3.0.0
* Fix small typing issue that broke Python 3.9 compat. 1.0.19 patch release.

## July 21, 2025
* ROPE support added to NaFlexViT. All models covered by the EVA base (`eva.py`) including EVA, EVA02, Meta PE ViT, `timm` SBB ViT w/ ROPE, and Naver ROPE-ViT can be now loaded in NaFlexViT when `use_naflex=True` passed at model creation time
* More Meta PE ViT encoders added, including small/tiny variants, lang variants w/ tiling, and more spatial variants.
* PatchDropout fixed with NaFlexViT and also w/ EVA models (regression after adding Naver ROPE-ViT)
* Fix XY order with grid_indexing='xy', impacted non-square image use in 'xy' mode (only ROPE-ViT and PE impacted).

## July 7, 2025
* MobileNet-v5 backbone tweaks for improved Google Gemma 3n behaviour (to pair with updated official weights)
  * Add stem bias (zero'd in updated weights, compat break with old weights)
  * GELU -> GELU (tanh approx). A minor change to be closer to JAX
* Add two arguments to layer-decay support, a min scale clamp and 'no optimization' scale threshold
* Add 'Fp32' LayerNorm, RMSNorm, SimpleNorm variants that can be enabled to force computation of norm in float32
* Some typing, argument cleanup for norm, norm+act layers done with above
* Support Naver ROPE-ViT (https://github.com/naver-ai/rope-vit) in `eva.py`, add RotaryEmbeddingMixed module for mixed mode, weights on HuggingFace Hub

|model                                             |img_size|top1  |top5  |param_count|
|--------------------------------------------------|--------|------|------|-----------|
|vit_large_patch16_rope_mixed_ape_224.naver_in1k  |224     |84.84 |97.122|304.4      |
|vit_large_patch16_rope_mixed_224.naver_in1k      |224     |84.828|97.116|304.2      |
|vit_large_patch16_rope_ape_224.naver_in1k        |224     |84.65 |97.154|304.37     |
|vit_large_patch16_rope_224.naver_in1k            |224     |84.648|97.122|304.17     |
|vit_base_patch16_rope_mixed_ape_224.naver_in1k   |224     |83.894|96.754|86.59      |
|vit_base_patch16_rope_mixed_224.naver_in1k       |224     |83.804|96.712|86.44      |
|vit_base_patch16_rope_ape_224.naver_in1k         |224     |83.782|96.61 |86.59      |
|vit_base_patch16_rope_224.naver_in1k             |224     |83.718|96.672|86.43      |
|vit_small_patch16_rope_224.naver_in1k            |224     |81.23 |95.022|21.98      |
|vit_small_patch16_rope_mixed_224.naver_in1k      |224     |81.216|95.022|21.99      |
|vit_small_patch16_rope_ape_224.naver_in1k        |224     |81.004|95.016|22.06      |
|vit_small_patch16_rope_mixed_ape_224.naver_in1k  |224     |80.986|94.976|22.06      |
* Some cleanup of ROPE modules, helpers, and FX tracing leaf registration
* Preparing version 1.0.17 release

## June 26, 2025
* MobileNetV5 backbone (w/ encoder only variant) for [Gemma 3n](https://ai.google.dev/gemma/docs/gemma-3n#parameters) image encoder
* Version 1.0.16 released

## June 23, 2025
* Add F.grid_sample based 2D and factorized pos embed resize to NaFlexViT. Faster when lots of different sizes (based on example by https://github.com/stas-sl).
* Further speed up patch embed resample by replacing vmap with matmul (based on snippet by https://github.com/stas-sl).
* Add 3 initial native aspect NaFlexViT checkpoints created while testing, ImageNet-1k and 3 different pos embed configs w/ same hparams.

 | Model | Top-1 Acc | Top-5 Acc | Params (M) | Eval Seq Len |
 |:---|:---:|:---:|:---:|:---:|
 | [naflexvit_base_patch16_par_gap.e300_s576_in1k](https://hf.co/timm/naflexvit_base_patch16_par_gap.e300_s576_in1k) | 83.67 | 96.45 | 86.63 | 576 |
 | [naflexvit_base_patch16_parfac_gap.e300_s576_in1k](https://hf.co/timm/naflexvit_base_patch16_parfac_gap.e300_s576_in1k) | 83.63 | 96.41 | 86.46 | 576 |
 | [naflexvit_base_patch16_gap.e300_s576_in1k](https://hf.co/timm/naflexvit_base_patch16_gap.e300_s576_in1k) | 83.50 | 96.46 | 86.63 | 576 |
* Support gradient checkpointing for `forward_intermediates` and fix some checkpointing bugs. Thanks https://github.com/brianhou0208
* Add 'corrected weight decay' (https://arxiv.org/abs/2506.02285) as option to AdamW (legacy), Adopt, Kron, Adafactor (BV), Lamb, LaProp, Lion, NadamW, RmsPropTF, SGDW optimizers
* Switch PE (perception encoder) ViT models to use native timm weights instead of remapping on the fly
* Fix cuda stream bug in prefetch loader
  
## June 5, 2025
* Initial NaFlexVit model code. NaFlexVit is a Vision Transformer with:
  1. Encapsulated embedding and position encoding in a single module
  2. Support for nn.Linear patch embedding on pre-patchified (dictionary) inputs
  3. Support for NaFlex variable aspect, variable resolution (SigLip-2: https://arxiv.org/abs/2502.14786)
  4. Support for FlexiViT variable patch size (https://arxiv.org/abs/2212.08013)
  5. Support for NaViT fractional/factorized position embedding (https://arxiv.org/abs/2307.06304)
* Existing vit models in `vision_transformer.py` can be loaded into the NaFlexVit model by adding the `use_naflex=True` flag to `create_model`
  * Some native weights coming soon
* A full NaFlex data pipeline is available that allows training / fine-tuning / evaluating with variable aspect / size images
  * To enable in `train.py` and `validate.py` add the `--naflex-loader` arg, must be used with a NaFlexVit
* To evaluate an existing (classic) ViT loaded in NaFlexVit model w/ NaFlex data pipe:
  * `python validate.py /imagenet --amp -j 8 --model vit_base_patch16_224 --model-kwargs use_naflex=True --naflex-loader --naflex-max-seq-len 256` 
* The training has some extra args features worth noting
  * The `--naflex-train-seq-lens'` argument specifies which sequence lengths to randomly pick from per batch during training
  * The `--naflex-max-seq-len` argument sets the target sequence length for validation
  * Adding `--model-kwargs enable_patch_interpolator=True --naflex-patch-sizes 12 16 24` will enable random patch size selection per-batch w/ interpolation
  * The `--naflex-loss-scale` arg changes loss scaling mode per batch relative to the batch size, `timm` NaFlex loading changes the batch size for each seq len

## May 28, 2025
* Add a number of small/fast models thanks to https://github.com/brianhou0208
  * SwiftFormer - [(ICCV2023) SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications](https://github.com/Amshaker/SwiftFormer) 
  * FasterNet - [(CVPR2023) Run, Don’t Walk: Chasing Higher FLOPS for Faster Neural Networks](https://github.com/JierunChen/FasterNet)
  * SHViT - [(CVPR2024) SHViT: Single-Head Vision Transformer with Memory Efficient](https://github.com/ysj9909/SHViT)
  * StarNet - [(CVPR2024) Rewrite the Stars](https://github.com/ma-xu/Rewrite-the-Stars)
  * GhostNet-V3 [GhostNetV3: Exploring the Training Strategies for Compact Models](https://github.com/huawei-noah/Efficient-AI-Backbones/tree/master/ghostnetv3_pytorch)
* Update EVA ViT (closest match) to support Perception Encoder models (https://arxiv.org/abs/2504.13181) from Meta, loading Hub weights but I still need to push dedicated `timm` weights
  * Add some flexibility to ROPE impl
* Big increase in number of models supporting `forward_intermediates()` and some additional fixes thanks to https://github.com/brianhou0208
  * DaViT, EdgeNeXt, EfficientFormerV2, EfficientViT(MIT), EfficientViT(MSRA), FocalNet, GCViT, HGNet /V2, InceptionNeXt, Inception-V4, MambaOut, MetaFormer, NesT, Next-ViT, PiT, PVT V2, RepGhostNet, RepViT, ResNetV2, ReXNet, TinyViT, TResNet, VoV
* TNT model updated w/ new weights `forward_intermediates()` thanks to https://github.com/brianhou0208
* Add `local-dir:` pretrained schema, can use `local-dir:/path/to/model/folder` for model name to source model / pretrained cfg & weights Hugging Face Hub models (config.json + weights file) from a local folder.
* Fixes, improvements for onnx export
    
## Feb 21, 2025
* SigLIP 2 ViT image encoders added (https://huggingface.co/collections/timm/siglip-2-67b8e72ba08b09dd97aecaf9)
  * Variable resolution / aspect NaFlex versions are a WIP
* Add 'SO150M2' ViT weights trained with SBB recipes, great results, better for ImageNet than previous attempt w/ less training.
  * `vit_so150m2_patch16_reg1_gap_448.sbb_e200_in12k_ft_in1k` - 88.1% top-1
  * `vit_so150m2_patch16_reg1_gap_384.sbb_e200_in12k_ft_in1k` - 87.9% top-1
  * `vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12k_ft_in1k` - 87.3% top-1
  * `vit_so150m2_patch16_reg4_gap_256.sbb_e200_in12k`
* Updated InternViT-300M '2.5' weights
* Release 1.0.15

## Feb 1, 2025
* FYI PyTorch 2.6 & Python 3.13 are tested and working w/ current main and released version of `timm`

## Jan 27, 2025
* Add Kron Optimizer (PSGD w/ Kronecker-factored preconditioner) 
  * Code from https://github.com/evanatyourservice/kron_torch
  * See also https://sites.google.com/site/lixilinx/home/psgd

## Jan 19, 2025
* Fix loading of LeViT safetensor weights, remove conversion code which should have been deactivated
* Add 'SO150M' ViT weights trained with SBB recipes, decent results, but not optimal shape for ImageNet-12k/1k pretrain/ft
  * `vit_so150m_patch16_reg4_gap_256.sbb_e250_in12k_ft_in1k` - 86.7% top-1
  * `vit_so150m_patch16_reg4_gap_384.sbb_e250_in12k_ft_in1k` - 87.4% top-1
  * `vit_so150m_patch16_reg4_gap_256.sbb_e250_in12k`
* Misc typing, typo, etc. cleanup
* 1.0.14 release to get above LeViT fix out

## Jan 9, 2025
* Add support to train and validate in pure `bfloat16` or `float16`
* `wandb` project name arg added by https://github.com/caojiaolong, use arg.experiment for name
* Fix old issue w/ checkpoint saving not working on filesystem w/o hard-link support (e.g. FUSE fs mounts)
* 1.0.13 release

## Jan 6, 2025
* Add `torch.utils.checkpoint.checkpoint()` wrapper in `timm.models` that defaults `use_reentrant=False`, unless `TIMM_REENTRANT_CKPT=1` is set in env.

## Dec 31, 2024
* `convnext_nano` 384x384 ImageNet-12k pretrain & fine-tune. https://huggingface.co/models?search=convnext_nano%20r384
* Add AIM-v2 encoders from https://github.com/apple/ml-aim, see on Hub: https://huggingface.co/models?search=timm%20aimv2
* Add PaliGemma2 encoders from https://github.com/google-research/big_vision to existing PaliGemma, see on Hub: https://huggingface.co/models?search=timm%20pali2
* Add missing L/14 DFN2B 39B CLIP ViT, `vit_large_patch14_clip_224.dfn2b_s39b`
* Fix existing `RmsNorm` layer & fn to match standard formulation, use PT 2.5 impl when possible. Move old impl to `SimpleNorm` layer, it's LN w/o centering or bias. There were only two `timm` models using it, and they have been updated.
* Allow override of `cache_dir` arg for model creation
* Pass through `trust_remote_code` for HF datasets wrapper
* `inception_next_atto` model added by creator
* Adan optimizer caution, and Lamb decoupled weight decay options
* Some feature_info metadata fixed by https://github.com/brianhou0208
* All OpenCLIP and JAX (CLIP, SigLIP, Pali, etc) model weights that used load time remapping were given their own HF Hub instances so that they work with `hf-hub:` based loading, and thus will work with new Transformers `TimmWrapperModel`

## Introduction

Py**T**orch **Im**age **M**odels (`timm`) is a collection of image models, layers, utilities, optimizers, schedulers, data-loaders / augmentations, and reference training / validation scripts that aim to pull together a wide variety of SOTA models with ability to reproduce ImageNet training results.

The work of many others is present here. I've tried to make sure all source material is acknowledged via links to github, arxiv papers, etc in the README, documentation, and code docstrings. Please let me know if I missed anything.

## Features

### Models

All model architecture families include variants with pretrained weights. There are specific model variants without any weights, it is NOT a bug. Help training new or better weights is always appreciated.

* Aggregating Nested Transformers - https://arxiv.org/abs/2105.12723
* BEiT - https://arxiv.org/abs/2106.08254
* BEiT-V2 - https://arxiv.org/abs/2208.06366
* BEiT3 - https://arxiv.org/abs/2208.10442
* Big Transfer ResNetV2 (BiT) - https://arxiv.org/abs/1912.11370
* Bottleneck Transformers - https://arxiv.org/abs/2101.11605
* CaiT (Class-Attention in Image Transformers) - https://arxiv.org/abs/2103.17239
* CoaT (Co-Scale Conv-Attentional Image Transformers) - https://arxiv.org/abs/2104.06399
* CoAtNet (Convolution and Attention) - https://arxiv.org/abs/2106.04803
* ConvNeXt - https://arxiv.org/abs/2201.03545
* ConvNeXt-V2 - http://arxiv.org/abs/2301.00808
* ConViT (Soft Convolutional Inductive Biases Vision Transformers)- https://arxiv.org/abs/2103.10697
* CspNet (Cross-Stage Partial Networks) - https://arxiv.org/abs/1911.11929
* DeiT - https://arxiv.org/abs/2012.12877
* DeiT-III - https://arxiv.org/pdf/2204.07118.pdf
* DenseNet - https://arxiv.org/abs/1608.06993
* DLA - https://arxiv.org/abs/1707.06484
* DPN (Dual-Path Network) - https://arxiv.org/abs/1707.01629
* EdgeNeXt - https://arxiv.org/abs/2206.10589
* EfficientFormer - https://arxiv.org/abs/2206.01191
* EfficientFormer-V2 - https://arxiv.org/abs/2212.08059
* EfficientNet (MBConvNet Family)
    * EfficientNet NoisyStudent (B0-B7, L2) - https://arxiv.org/abs/1911.04252
    * EfficientNet AdvProp (B0-B8) - https://arxiv.org/abs/1911.09665
    * EfficientNet (B0-B7) - https://arxiv.org/abs/1905.11946
    * EfficientNet-EdgeTPU (S, M, L) - https://ai.googleblog.com/2019/08/efficientnet-edgetpu-creating.html
    * EfficientNet V2 - https://arxiv.org/abs/2104.00298
    * FBNet-C - https://arxiv.org/abs/1812.03443
    * MixNet - https://arxiv.org/abs/1907.09595
    * MNASNet B1, A1 (Squeeze-Excite), and Small - https://arxiv.org/abs/1807.11626
    * MobileNet-V2 - https://arxiv.org/abs/1801.04381
    * Single-Path NAS - https://arxiv.org/abs/1904.02877
    * TinyNet - https://arxiv.org/abs/2010.14819
* EfficientViT (MIT) - https://arxiv.org/abs/2205.14756
* EfficientViT (MSRA) - https://arxiv.org/abs/2305.07027
* EVA - https://arxiv.org/abs/2211.07636
* EVA-02 - https://arxiv.org/abs/2303.11331
* FasterNet - https://arxiv.org/abs/2303.03667
* FastViT - https://arxiv.org/abs/2303.14189
* FlexiViT - https://arxiv.org/abs/2212.08013
* FocalNet (Focal Modulation Networks) - https://arxiv.org/abs/2203.11926
* GCViT (Global Context Vision Transformer) - https://arxiv.org/abs/2206.09959
* GhostNet - https://arxiv.org/abs/1911.11907
* GhostNet-V2 - https://arxiv.org/abs/2211.12905
* GhostNet-V3 - https://arxiv.org/abs/2404.11202
* gMLP - https://arxiv.org/abs/2105.08050
* GPU-Efficient Networks - https://arxiv.org/abs/2006.14090
* Halo Nets - https://arxiv.org/abs/2103.12731
* HGNet / HGNet-V2 - TBD
* HRNet - https://arxiv.org/abs/1908.07919
* InceptionNeXt - https://arxiv.org/abs/2303.16900
* Inception-V3 - https://arxiv.org/abs/1512.00567
* Inception-ResNet-V2 and Inception-V4 - https://arxiv.org/abs/1602.07261
* Lambda Networks - https://arxiv.org/abs/2102.08602
* LeViT (Vision Transformer in ConvNet's Clothing) - https://arxiv.org/abs/2104.01136
* MambaOut - https://arxiv.org/abs/2405.07992
* MaxViT (Multi-Axis Vision Transformer) - https://arxiv.org/abs/2204.01697
* MetaFormer (PoolFormer-v2, ConvFormer, CAFormer) - https://arxiv.org/abs/2210.13452
* MLP-Mixer - https://arxiv.org/abs/2105.01601
* MobileCLIP - https://arxiv.org/abs/2311.17049
* MobileNet-V3 (MBConvNet w/ Efficient Head) - https://arxiv.org/abs/1905.02244
  * FBNet-V3 - https://arxiv.org/abs/2006.02049
  * HardCoRe-NAS - https://arxiv.org/abs/2102.11646
  * LCNet - https://arxiv.org/abs/2109.15099
* MobileNetV4 - https://arxiv.org/abs/2404.10518
* MobileOne - https://arxiv.org/abs/2206.04040
* MobileViT - https://arxiv.org/abs/2110.02178
* MobileViT-V2 - https://arxiv.org/abs/2206.02680
* MViT-V2 (Improved Multiscale Vision Transformer) - https://arxiv.org/abs/2112.01526
* NASNet-A - https://arxiv.org/abs/1707.07012
* NesT - https://arxiv.org/abs/2105.12723
* Next-ViT - https://arxiv.org/abs/2207.05501
* NFNet-F - https://arxiv.org/abs/2102.06171
* NF-RegNet / NF-ResNet - https://arxiv.org/abs/2101.08692
* PE (Perception Encoder) - https://arxiv.org/abs/2504.13181
* PNasNet - https://arxiv.org/abs/1712.00559
* PoolFormer (MetaFormer) - https://arxiv.org/abs/2111.11418
* Pooling-based Vision Transformer (PiT) - https://arxiv.org/abs/2103.16302
* PVT-V2 (Improved Pyramid Vision Transformer) - https://arxiv.org/abs/2106.13797
* RDNet (DenseNets Reloaded) - https://arxiv.org/abs/2403.19588
* RegNet - https://arxiv.org/abs/2003.13678
* RegNetZ - https://arxiv.org/abs/2103.06877
* RepVGG - https://arxiv.org/abs/2101.03697
* RepGhostNet - https://arxiv.org/abs/2211.06088
* RepViT - https://arxiv.org/abs/2307.09283
* ResMLP - https://arxiv.org/abs/2105.03404
* ResNet/ResNeXt
    * ResNet (v1b/v1.5) - https://arxiv.org/abs/1512.03385
    * ResNeXt - https://arxiv.org/abs/1611.05431
    * 'Bag of Tricks' / Gluon C, D, E, S variations - https://arxiv.org/abs/1812.01187
    * Weakly-supervised (WSL) Instagram pretrained / ImageNet tuned ResNeXt101 - https://arxiv.org/abs/1805.00932
    * Semi-supervised (SSL) / Semi-weakly Supervised (SWSL) ResNet/ResNeXts - https://arxiv.org/abs/1905.00546
    * ECA-Net (ECAResNet) - https://arxiv.org/abs/1910.03151v4
    * Squeeze-and-Excitation Networks (SEResNet) - https://arxiv.org/abs/1709.01507
    * ResNet-RS - https://arxiv.org/abs/2103.07579
* Res2Net - https://arxiv.org/abs/1904.01169
* ResNeSt - https://arxiv.org/abs/2004.08955
* ReXNet - https://arxiv.org/abs/2007.00992
* ROPE-ViT - https://arxiv.org/abs/2403.13298
* SelecSLS - https://arxiv.org/abs/1907.00837
* Selective Kernel Networks - https://arxiv.org/abs/1903.06586
* Sequencer2D - https://arxiv.org/abs/2205.01972
* SHViT - https://arxiv.org/abs/2401.16456
* SigLIP (image encoder) - https://arxiv.org/abs/2303.15343
* SigLIP 2 (image encoder) - https://arxiv.org/abs/2502.14786
* StarNet - https://arxiv.org/abs/2403.19967
* SwiftFormer - https://arxiv.org/pdf/2303.15446
* Swin S3 (AutoFormerV2) - https://arxiv.org/abs/2111.14725
* Swin Transformer - https://arxiv.org/abs/2103.14030
* Swin Transformer V2 - https://arxiv.org/abs/2111.09883
* TinyViT - https://arxiv.org/abs/2207.10666
* Transformer-iN-Transformer (TNT) - https://arxiv.org/abs/2103.00112
* TResNet - https://arxiv.org/abs/2003.13630
* Twins (Spatial Attention in Vision Transformers) - https://arxiv.org/pdf/2104.13840.pdf
* VGG - https://arxiv.org/abs/1409.1556
* Visformer - https://arxiv.org/abs/2104.12533
* Vision Transformer - https://arxiv.org/abs/2010.11929
* ViTamin - https://arxiv.org/abs/2404.02132
* VOLO (Vision Outlooker) - https://arxiv.org/abs/2106.13112
* VovNet V2 and V1 - https://arxiv.org/abs/1911.06667
* Xception - https://arxiv.org/abs/1610.02357
* Xception (Modified Aligned, Gluon) - https://arxiv.org/abs/1802.02611
* Xception (Modified Aligned, TF) - https://arxiv.org/abs/1802.02611
* XCiT (Cross-Covariance Image Transformers) - https://arxiv.org/abs/2106.09681

### Optimizers
To see full list of optimizers w/ descriptions: `timm.optim.list_optimizers(with_description=True)`

Included optimizers available via `timm.optim.create_optimizer_v2` factory method:
* `adabelief` an implementation of AdaBelief adapted from https://github.com/juntang-zhuang/Adabelief-Optimizer - https://arxiv.org/abs/2010.07468
* `adafactor` adapted from [FAIRSeq impl](https://github.com/pytorch/fairseq/blob/master/fairseq/optim/adafactor.py) - https://arxiv.org/abs/1804.04235
* `adafactorbv` adapted from [Big Vision](https://github.com/google-research/big_vision/blob/main/big_vision/optax.py) - https://arxiv.org/abs/2106.04560
* `adahessian` by [David Samuel](https://github.com/davda54/ada-hessian) - https://arxiv.org/abs/2006.00719
* `adamp` and `sgdp` by [Naver ClovAI](https://github.com/clovaai) - https://arxiv.org/abs/2006.08217
* `adamuon` and `nadamuon` as per https://github.com/Chongjie-Si/AdaMuon - https://arxiv.org/abs/2507.11005
* `adan` an implementation of Adan adapted from https://github.com/sail-sg/Adan - https://arxiv.org/abs/2208.06677
* `adopt` ADOPT adapted from https://github.com/iShohei220/adopt - https://arxiv.org/abs/2411.02853
* `kron` PSGD w/ Kronecker-factored preconditioner from https://github.com/evanatyourservice/kron_torch - https://sites.google.com/site/lixilinx/home/psgd
* `lamb` an implementation of Lamb and LambC (w/ trust-clipping) cleaned up and modified to support use with XLA - https://arxiv.org/abs/1904.00962
* `laprop` optimizer from https://github.com/Z-T-WANG/LaProp-Optimizer - https://arxiv.org/abs/2002.04839
* `lars` an implementation of LARS and LARC (w/ trust-clipping) - https://arxiv.org/abs/1708.03888
* `lion` and implementation of Lion adapted from https://github.com/google/automl/tree/master/lion - https://arxiv.org/abs/2302.06675
* `lookahead` adapted from impl by [Liam](https://github.com/alphadl/lookahead.pytorch) - https://arxiv.org/abs/1907.08610
* `madgrad` an implementation of MADGRAD adapted from https://github.com/facebookresearch/madgrad - https://arxiv.org/abs/2101.11075
* `mars` MARS optimizer from https://github.com/AGI-Arena/MARS - https://arxiv.org/abs/2411.10438
* `muon` MUON optimizer from https://github.com/KellerJordan/Muon with numerous additions and improved non-transformer behaviour
* `nadam` an implementation of Adam w/ Nesterov momentum
* `nadamw` an implementation of AdamW (Adam w/ decoupled weight-decay) w/ Nesterov momentum. A simplified impl based on https://github.com/mlcommons/algorithmic-efficiency
* `novograd` by [Masashi Kimura](https://github.com/convergence-lab/novograd) - https://arxiv.org/abs/1905.11286
* `radam` by [Liyuan Liu](https://github.com/LiyuanLucasLiu/RAdam) - https://arxiv.org/abs/1908.03265
* `rmsprop_tf` adapted from PyTorch RMSProp by myself. Reproduces much improved Tensorflow RMSProp behaviour
* `sgdw` and implementation of SGD w/ decoupled weight-decay
* `fused<name>` optimizers by name with [NVIDIA Apex](https://github.com/NVIDIA/apex/tree/master/apex/optimizers) installed
* `bnb<name>` optimizers by name with [BitsAndBytes](https://github.com/TimDettmers/bitsandbytes) installed
* `cadamw`, `clion`, and more 'Cautious' optimizers from https://github.com/kyleliang919/C-Optim - https://arxiv.org/abs/2411.16085
* `adam`, `adamw`, `rmsprop`, `adadelta`, `adagrad`, and `sgd` pass through to `torch.optim` implementations
* `c` suffix (eg `adamc`, `nadamc` to implement 'corrected weight decay' in https://arxiv.org/abs/2506.02285)
  
### Augmentations
* Random Erasing from [Zhun Zhong](https://github.com/zhunzhong07/Random-Erasing/blob/master/transforms.py) - https://arxiv.org/abs/1708.04896)
* Mixup - https://arxiv.org/abs/1710.09412
* CutMix - https://arxiv.org/abs/1905.04899
* AutoAugment (https://arxiv.org/abs/1805.09501) and RandAugment (https://arxiv.org/abs/1909.13719) ImageNet configurations modeled after impl for EfficientNet training (https://github.com/tensorflow/tpu/blob/master/models/official/efficientnet/autoaugment.py)
* AugMix w/ JSD loss, JSD w/ clean + augmented mixing support works with AutoAugment and RandAugment as well - https://arxiv.org/abs/1912.02781
* SplitBachNorm - allows splitting batch norm layers between clean and augmented (auxiliary batch norm) data

### Regularization
* DropPath aka "Stochastic Depth" - https://arxiv.org/abs/1603.09382
* DropBlock - https://arxiv.org/abs/1810.12890
* Blur Pooling - https://arxiv.org/abs/1904.11486

### Other

Several (less common) features that I often utilize in my projects are included. Many of their additions are the reason why I maintain my own set of models, instead of using others' via PIP:

* All models have a common default configuration interface and API for
    * accessing/changing the classifier - `get_classifier` and `reset_classifier`
    * doing a forward pass on just the features - `forward_features` (see [documentation](https://huggingface.co/docs/timm/feature_extraction))
    * these makes it easy to write consistent network wrappers that work with any of the models
* All models support multi-scale feature map extraction (feature pyramids) via create_model (see [documentation](https://huggingface.co/docs/timm/feature_extraction))
    * `create_model(name, features_only=True, out_indices=..., output_stride=...)`
    * `out_indices` creation arg specifies which feature maps to return, these indices are 0 based and generally correspond to the `C(i + 1)` feature level.
    * `output_stride` creation arg controls output stride of the network by using dilated convolutions. Most networks are stride 32 by default. Not all networks support this.
    * feature map channel counts, reduction level (stride) can be queried AFTER model creation via the `.feature_info` member
* All models have a consistent pretrained weight loader that adapts last linear if necessary, and from 3 to 1 channel input if desired
* High performance [reference training, validation, and inference scripts](https://huggingface.co/docs/timm/training_script) that work in several process/GPU modes:
    * NVIDIA DDP w/ a single GPU per process, multiple processes with APEX present (AMP mixed-precision optional)
    * PyTorch DistributedDataParallel w/ multi-gpu, single process (AMP disabled as it crashes when enabled)
    * PyTorch w/ single GPU single process (AMP optional)
* A dynamic global pool implementation that allows selecting from average pooling, max pooling, average + max, or concat([average, max]) at model creation. All global pooling is adaptive average by default and compatible with pretrained weights.
* A 'Test Time Pool' wrapper that can wrap any of the included models and usually provides improved performance doing inference with input images larger than the training size. Idea adapted from original DPN implementation when I ported (https://github.com/cypw/DPNs)
* Learning rate schedulers
  * Ideas adopted from
     * [AllenNLP schedulers](https://github.com/allenai/allennlp/tree/master/allennlp/training/learning_rate_schedulers)
     * [FAIRseq lr_scheduler](https://github.com/pytorch/fairseq/tree/master/fairseq/optim/lr_scheduler)
     * SGDR: Stochastic Gradient Descent with Warm Restarts (https://arxiv.org/abs/1608.03983)
  * Schedulers include `step`, `cosine` w/ restarts, `tanh` w/ restarts, `plateau`
* Space-to-Depth by [mrT23](https://github.com/mrT23/TResNet/blob/master/src/models/tresnet/layers/space_to_depth.py) (https://arxiv.org/abs/1801.04590)
* Adaptive Gradient Clipping (https://arxiv.org/abs/2102.06171, https://github.com/deepmind/deepmind-research/tree/master/nfnets)
* An extensive selection of channel and/or spatial attention modules:
    * Bottleneck Transformer - https://arxiv.org/abs/2101.11605
    * CBAM - https://arxiv.org/abs/1807.06521
    * Effective Squeeze-Excitation (ESE) - https://arxiv.org/abs/1911.06667
    * Efficient Channel Attention (ECA) - https://arxiv.org/abs/1910.03151
    * Gather-Excite (GE) - https://arxiv.org/abs/1810.12348
    * Global Context (GC) - https://arxiv.org/abs/1904.11492
    * Halo - https://arxiv.org/abs/2103.12731
    * Involution - https://arxiv.org/abs/2103.06255
    * Lambda Layer - https://arxiv.org/abs/2102.08602
    * Non-Local (NL) -  https://arxiv.org/abs/1711.07971
    * Squeeze-and-Excitation (SE) - https://arxiv.org/abs/1709.01507
    * Selective Kernel (SK) - (https://arxiv.org/abs/1903.06586
    * Split (SPLAT) - https://arxiv.org/abs/2004.08955
    * Shifted Window (SWIN) - https://arxiv.org/abs/2103.14030

## Results

Model validation results can be found in the [results tables](results/README.md)

## Getting Started (Documentation)

The official documentation can be found at https://huggingface.co/docs/hub/timm. Documentation contributions are welcome.

[Getting Started with PyTorch Image Models (timm): A Practitioner’s Guide](https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055-2/) by [Chris Hughes](https://github.com/Chris-hughes10) is an extensive blog post covering many aspects of `timm` in detail.

[timmdocs](http://timm.fast.ai/) is an alternate set of documentation for `timm`. A big thanks to [Aman Arora](https://github.com/amaarora) for his efforts creating timmdocs.

[paperswithcode](https://paperswithcode.com/lib/timm) is a good resource for browsing the models within `timm`.

## Train, Validation, Inference Scripts

The root folder of the repository contains reference train, validation, and inference scripts that work with the included models and other features of this repository. They are adaptable for other datasets and use cases with a little hacking. See [documentation](https://huggingface.co/docs/timm/training_script).

## Awesome PyTorch Resources

One of the greatest assets of PyTorch is the community and their contributions. A few of my favourite resources that pair well with the models and components here are listed below.

### Object Detection, Instance and Semantic Segmentation
* Detectron2 - https://github.com/facebookresearch/detectron2
* Segmentation Models (Semantic) - https://github.com/qubvel/segmentation_models.pytorch
* EfficientDet (Obj Det, Semantic soon) - https://github.com/rwightman/efficientdet-pytorch

### Computer Vision / Image Augmentation
* Albumentations - https://github.com/albumentations-team/albumentations
* Kornia - https://github.com/kornia/kornia

### Knowledge Distillation
* RepDistiller - https://github.com/HobbitLong/RepDistiller
* torchdistill - https://github.com/yoshitomo-matsubara/torchdistill

### Metric Learning
* PyTorch Metric Learning - https://github.com/KevinMusgrave/pytorch-metric-learning

### Training / Frameworks
* fastai - https://github.com/fastai/fastai
* lightly_train - https://github.com/lightly-ai/lightly-train

### Deployment
* timmx (Export timm models to ONNX, CoreML, LiteRT, TensorRT, and more) - https://github.com/Boulaouaney/timmx

## Licenses

### Code
The code here is licensed Apache 2.0. I've taken care to make sure any third party code included or adapted has compatible (permissive) licenses such as MIT, BSD, etc. I've made an effort to avoid any GPL / LGPL conflicts. That said, it is your responsibility to ensure you comply with licenses here and conditions of any dependent licenses. Where applicable, I've linked the sources/references for various components in docstrings. If you think I've missed anything please create an issue.

### Pretrained Weights
So far all of the pretrained weights available here are pretrained on ImageNet with a select few that have some additional pretraining (see extra note below). ImageNet was released for non-commercial research purposes only (https://image-net.org/download). It's not clear what the implications of that are for the use of pretrained weights from that dataset. Any models I have trained with ImageNet are done for research purposes and one should assume that the original dataset license applies to the weights. It's best to seek legal advice if you intend to use the pretrained weights in a commercial product.

#### Pretrained on more than ImageNet
Several weights included or references here were pretrained with proprietary datasets that I do not have access to. These include the Facebook WSL, SSL, SWSL ResNe(Xt) and the Google Noisy Student EfficientNet models. The Facebook models have an explicit non-commercial license (CC-BY-NC 4.0, https://github.com/facebookresearch/semi-supervised-ImageNet1K-models, https://github.com/facebookresearch/WSL-Images). The Google models do not appear to have any restriction beyond the Apache 2.0 license (and ImageNet concerns). In either case, you should contact Facebook or Google with any questions.

## Citing

### BibTeX

```bibtex
@misc{rw2019timm,
  author = {Ross Wightman},
  title = {PyTorch Image Models},
  year = {2019},
  publisher = {GitHub},
  journal = {GitHub repository},
  doi = {10.5281/zenodo.4414861},
  howpublished = {\url{https://github.com/rwightman/pytorch-image-models}}
}
```

### Latest DOI

[![DOI](https://zenodo.org/badge/168799526.svg)](https://zenodo.org/badge/latestdoi/168799526)


================================================
FILE: UPGRADING.md
================================================
# Upgrading from previous versions

I generally try to maintain code interface and especially model weight compatibility across many `timm` versions. Sometimes there are exceptions.

## Checkpoint remapping

Pretrained weight remapping is handled by `checkpoint_filter_fn` in a model implementation module. This remaps old pretrained checkpoints to new, and also 3rd party (original) checkpoints to `timm` format if the model was modified when brought into `timm`.

The `checkpoint_filter_fn` is automatically called when loading pretrained weights via `pretrained=True`, but they can be called manually if you call the fn directly with the current model instance and old state dict.

## Upgrading from 0.6 and earlier

Many changes were made since the 0.6.x stable releases. They were previewed in 0.8.x dev releases but not everyone transitioned.
* `timm.models.layers` moved to `timm.layers`:
  * `from timm.models.layers import name` will still work via deprecation mapping (but please transition to `timm.layers`).
  * `import timm.models.layers.module` or `from timm.models.layers.module import name` needs to be changed now.
* Builder, helper, non-model modules in `timm.models` have a `_` prefix added, ie `timm.models.helpers` -> `timm.models._helpers`, there are temporary deprecation mapping files but those will be removed.
* All models now support `architecture.pretrained_tag` naming (ex `resnet50.rsb_a1`).
  * The pretrained_tag is the specific weight variant (different head) for the architecture.
  * Using only `architecture` defaults to the first weights in the default_cfgs for that model architecture.
  * In adding pretrained tags, many model names that existed to differentiate were renamed to use the tag  (ex: `vit_base_patch16_224_in21k` -> `vit_base_patch16_224.augreg_in21k`). There are deprecation mappings for these.
* A number of models had their checkpoints remapped to match architecture changes needed to better support `features_only=True`, there are `checkpoint_filter_fn` methods in any model module that was remapped. These can be passed to `timm.models.load_checkpoint(..., filter_fn=timm.models.swin_transformer_v2.checkpoint_filter_fn)` to remap your existing checkpoint.
* The Hugging Face Hub (https://huggingface.co/timm) is now the primary source for `timm` weights. Model cards include link to papers, original source, license. 
* Previous 0.6.x can be cloned from [0.6.x](https://github.com/rwightman/pytorch-image-models/tree/0.6.x) branch or installed via pip with version.


================================================
FILE: avg_checkpoints.py
================================================
#!/usr/bin/env python3
""" Checkpoint Averaging Script

This script averages all model weights for checkpoints in specified path that match
the specified filter wildcard. All checkpoints must be from the exact same model.

For any hope of decent results, the checkpoints should be from the same or child
(via resumes) training session. This can be viewed as similar to maintaining running
EMA (exponential moving average) of the model weights or performing SWA (stochastic
weight averaging), but post-training.

Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
"""
import torch
import argparse
import os
import glob
import hashlib
from timm.models import load_state_dict
from timm.models._helpers import _torch_load
try:
    import safetensors.torch
    _has_safetensors = True
except ImportError:
    _has_safetensors = False

DEFAULT_OUTPUT = "./averaged.pth"
DEFAULT_SAFE_OUTPUT = "./averaged.safetensors"

parser = argparse.ArgumentParser(description='PyTorch Checkpoint Averager')
parser.add_argument('--input', default='', type=str, metavar='PATH',
                    help='path to base input folder containing checkpoints')
parser.add_argument('--filter', default='*.pth.tar', type=str, metavar='WILDCARD',
                    help='checkpoint filter (path wildcard)')
parser.add_argument('--output', default=DEFAULT_OUTPUT, type=str, metavar='PATH',
                    help=f'Output filename. Defaults to {DEFAULT_SAFE_OUTPUT} when passing --safetensors.')
parser.add_argument('--no-use-ema', dest='no_use_ema', action='store_true',
                    help='Force not using ema version of weights (if present)')
parser.add_argument('--no-sort', dest='no_sort', action='store_true',
                    help='Do not sort and select by checkpoint metric, also makes "n" argument irrelevant')
parser.add_argument('-n', type=int, default=10, metavar='N',
                    help='Number of checkpoints to average')
parser.add_argument('--safetensors', action='store_true',
                    help='Save weights using safetensors instead of the default torch way (pickle).')


def checkpoint_metric(checkpoint_path):
    if not checkpoint_path or not os.path.isfile(checkpoint_path):
        return {}
    print("=> Extracting metric from checkpoint '{}'".format(checkpoint_path))
    checkpoint = _torch_load(checkpoint_path, map_location='cpu', weights_only=True)
    metric = None
    if 'metric' in checkpoint:
        metric = checkpoint['metric']
    elif 'metrics' in checkpoint and 'metric_name' in checkpoint:
        metrics = checkpoint['metrics']
        print(metrics)
        metric = metrics[checkpoint['metric_name']]
    return metric


def main():
    args = parser.parse_args()
    # by default use the EMA weights (if present)
    args.use_ema = not args.no_use_ema
    # by default sort by checkpoint metric (if present) and avg top n checkpoints
    args.sort = not args.no_sort

    if args.safetensors and args.output == DEFAULT_OUTPUT:
        # Default path changes if using safetensors
        args.output = DEFAULT_SAFE_OUTPUT

    output, output_ext = os.path.splitext(args.output)
    if not output_ext:
        output_ext = ('.safetensors' if args.safetensors else '.pth')
    output = output + output_ext

    if args.safetensors and not output_ext == ".safetensors":
        print(
            "Warning: saving weights as safetensors but output file extension is not "
            f"set to '.safetensors': {args.output}"
        )

    if os.path.exists(output):
        print("Error: Output filename ({}) already exists.".format(output))
        exit(1)

    pattern = args.input
    if not args.input.endswith(os.path.sep) and not args.filter.startswith(os.path.sep):
        pattern += os.path.sep
    pattern += args.filter
    checkpoints = glob.glob(pattern, recursive=True)

    if args.sort:
        checkpoint_metrics = []
        for c in checkpoints:
            metric = checkpoint_metric(c)
            if metric is not None:
                checkpoint_metrics.append((metric, c))
        checkpoint_metrics = list(sorted(checkpoint_metrics))
        checkpoint_metrics = checkpoint_metrics[-args.n:]
        if checkpoint_metrics:
            print("Selected checkpoints:")
            [print(m, c) for m, c in checkpoint_metrics]
        avg_checkpoints = [c for m, c in checkpoint_metrics]
    else:
        avg_checkpoints = checkpoints
        if avg_checkpoints:
            print("Selected checkpoints:")
            [print(c) for c in checkpoints]

    if not avg_checkpoints:
        print('Error: No checkpoints found to average.')
        exit(1)

    avg_state_dict = {}
    avg_counts = {}
    for c in avg_checkpoints:
        new_state_dict = load_state_dict(c, args.use_ema)
        if not new_state_dict:
            print(f"Error: Checkpoint ({c}) doesn't exist")
            continue
        for k, v in new_state_dict.items():
            if k not in avg_state_dict:
                avg_state_dict[k] = v.clone().to(dtype=torch.float64)
                avg_counts[k] = 1
            else:
                avg_state_dict[k] += v.to(dtype=torch.float64)
                avg_counts[k] += 1

    for k, v in avg_state_dict.items():
        v.div_(avg_counts[k])

    # float32 overflow seems unlikely based on weights seen to date, but who knows
    float32_info = torch.finfo(torch.float32)
    final_state_dict = {}
    for k, v in avg_state_dict.items():
        v = v.clamp(float32_info.min, float32_info.max)
        final_state_dict[k] = v.to(dtype=torch.float32)

    if args.safetensors:
        assert _has_safetensors, "`pip install safetensors` to use .safetensors"
        safetensors.torch.save_file(final_state_dict, output)
    else:
        torch.save(final_state_dict, output)

    with open(output, 'rb') as f:
        sha_hash = hashlib.sha256(f.read()).hexdigest()
    print(f"=> Saved state_dict to '{output}, SHA256: {sha_hash}'")


if __name__ == '__main__':
    main()


================================================
FILE: benchmark.py
================================================
#!/usr/bin/env python3
""" Model Benchmark Script

An inference and train step benchmark script for timm models.

Hacked together by Ross Wightman (https://github.com/rwightman)
"""
import argparse
import csv
import json
import logging
import time
from collections import OrderedDict
from contextlib import suppress
from functools import partial

import torch
import torch.nn as nn
import torch.nn.parallel

from timm.data import resolve_data_config
from timm.layers import set_fast_norm
from timm.models import create_model, is_model, list_models
from timm.optim import create_optimizer_v2
from timm.utils import setup_default_logging, set_jit_fuser, decay_batch_step, check_batch_size_retry, ParseKwargs,\
    reparameterize_model

try:
    from deepspeed.profiling.flops_profiler import get_model_profile
    has_deepspeed_profiling = True
except ImportError as e:
    has_deepspeed_profiling = False

try:
    from fvcore.nn import FlopCountAnalysis, flop_count_str, ActivationCountAnalysis
    has_fvcore_profiling = True
except ImportError as e:
    FlopCountAnalysis = None
    has_fvcore_profiling = False

try:
    from functorch.compile import memory_efficient_fusion
    has_functorch = True
except ImportError as e:
    has_functorch = False

has_compile = hasattr(torch, 'compile')

if torch.cuda.is_available():
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.benchmark = True
_logger = logging.getLogger('validate')


parser = argparse.ArgumentParser(description='PyTorch Benchmark')

# benchmark specific args
parser.add_argument('--model-list', metavar='NAME', default='',
                    help='txt file based list of model names to benchmark')
parser.add_argument('--bench', default='both', type=str,
                    help="Benchmark mode. One of 'inference', 'train', 'both'. Defaults to 'both'")
parser.add_argument('--detail', action='store_true', default=False,
                    help='Provide train fwd/bwd/opt breakdown detail if True. Defaults to False')
parser.add_argument('--no-retry', action='store_true', default=False,
                    help='Do not decay batch size and retry on error.')
parser.add_argument('--results-file', default='', type=str,
                    help='Output csv file for validation results (summary)')
parser.add_argument('--results-format', default='csv', type=str,
                    help='Format for results file one of (csv, json) (default: csv).')
parser.add_argument('--num-warm-iter', default=10, type=int,
                    help='Number of warmup iterations (default: 10)')
parser.add_argument('--num-bench-iter', default=40, type=int,
                    help='Number of benchmark iterations (default: 40)')
parser.add_argument('--device', default='cuda', type=str,
                    help="device to run benchmark on")

# common inference / train args
parser.add_argument('--model', '-m', metavar='NAME', default='resnet50',
                    help='model architecture (default: resnet50)')
parser.add_argument('-b', '--batch-size', default=256, type=int,
                    metavar='N', help='mini-batch size (default: 256)')
parser.add_argument('--img-size', default=None, type=int,
                    metavar='N', help='Input image dimension, uses model default if empty')
parser.add_argument('--input-size', default=None, nargs=3, type=int, metavar='N',
                    help='Input all image dimensions (d h w, e.g. --input-size 3 224 224), uses model default if empty')
parser.add_argument('--use-train-size', action='store_true', default=False,
                    help='Run inference at train size, not test-input-size if it exists.')
parser.add_argument('--num-classes', type=int, default=None,
                    help='Number classes in dataset')
parser.add_argument('--gp', default=None, type=str, metavar='POOL',
                    help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.')
parser.add_argument('--channels-last', action='store_true', default=False,
                    help='Use channels_last memory layout')
parser.add_argument('--grad-checkpointing', action='store_true', default=False,
                    help='Enable gradient checkpointing through model blocks/stages')
parser.add_argument('--amp', action='store_true', default=False,
                    help='use PyTorch Native AMP for mixed precision training. Overrides --precision arg.')
parser.add_argument('--amp-dtype', default='float16', type=str,
                    help='lower precision AMP dtype (default: float16). Overrides --precision arg if args.amp True.')
parser.add_argument('--precision', default='float32', type=str,
                    help='Numeric precision. One of (amp, float32, float16, bfloat16, tf32)')
parser.add_argument('--fuser', default='', type=str,
                    help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')")
parser.add_argument('--fast-norm', default=False, action='store_true',
                    help='enable experimental fast-norm')
parser.add_argument('--reparam', default=False, action='store_true',
                    help='Reparameterize model')
parser.add_argument('--model-kwargs', nargs='*', default={}, action=ParseKwargs)
parser.add_argument('--torchcompile-mode', type=str, default=None,
                    help="torch.compile mode (default: None).")

# codegen (model compilation) options
scripting_group = parser.add_mutually_exclusive_group()
scripting_group.add_argument('--torchscript', dest='torchscript', action='store_true',
                             help='convert model torchscript for inference')
scripting_group.add_argument('--torchcompile', nargs='?', type=str, default=None, const='inductor',
                             help="Enable compilation w/ specified backend (default: inductor).")
scripting_group.add_argument('--aot-autograd', default=False, action='store_true',
                             help="Enable AOT Autograd optimization.")

# train optimizer parameters
parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER',
                    help='Optimizer (default: "sgd"')
parser.add_argument('--opt-eps', default=None, type=float, metavar='EPSILON',
                    help='Optimizer Epsilon (default: None, use opt default)')
parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA',
                    help='Optimizer Betas (default: None, use opt default)')
parser.add_argument('--momentum', type=float, default=0.9, metavar='M',
                    help='Optimizer momentum (default: 0.9)')
parser.add_argument('--weight-decay', type=float, default=0.0001,
                    help='weight decay (default: 0.0001)')
parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM',
                    help='Clip gradient norm (default: None, no clipping)')
parser.add_argument('--clip-mode', type=str, default='norm',
                    help='Gradient clipping mode. One of ("norm", "value", "agc")')


# model regularization / loss params that impact model or loss fn
parser.add_argument('--smoothing', type=float, default=0.1,
                    help='Label smoothing (default: 0.1)')
parser.add_argument('--drop', type=float, default=0.0, metavar='PCT',
                    help='Dropout rate (default: 0.)')
parser.add_argument('--drop-path', type=float, default=None, metavar='PCT',
                    help='Drop path rate (default: None)')
parser.add_argument('--drop-block', type=float, default=None, metavar='PCT',
                    help='Drop block rate (default: None)')


def timestamp(sync=False):
    return time.perf_counter()


def cuda_timestamp(sync=False, device=None):
    if sync:
        torch.cuda.synchronize(device=device)
    return time.perf_counter()


def count_params(model: nn.Module):
    return sum([m.numel() for m in model.parameters()])


def resolve_precision(precision: str):
    assert precision in ('amp', 'amp_bfloat16', 'float16', 'bfloat16', 'float32')
    amp_dtype = None  # amp disabled
    model_dtype = torch.float32
    data_dtype = torch.float32
    if precision == 'amp':
        amp_dtype = torch.float16
    elif precision == 'amp_bfloat16':
        amp_dtype = torch.bfloat16
    elif precision == 'float16':
        model_dtype = torch.float16
        data_dtype = torch.float16
    elif precision == 'bfloat16':
        model_dtype = torch.bfloat16
        data_dtype = torch.bfloat16
    return amp_dtype, model_dtype, data_dtype


def profile_deepspeed(model, input_size=(3, 224, 224), batch_size=1, detailed=False):
    _, macs, _ = get_model_profile(
        model=model,
        input_shape=(batch_size,) + input_size,  # input shape/resolution
        print_profile=detailed,  # prints the model graph with the measured profile attached to each module
        detailed=detailed,  # print the detailed profile
        warm_up=10,  # the number of warm-ups before measuring the time of each module
        as_string=False,  # print raw numbers (e.g. 1000) or as human-readable strings (e.g. 1k)
        output_file=None,  # path to the output file. If None, the profiler prints to stdout.
        ignore_modules=None)  # the list of modules to ignore in the profiling
    return macs, 0  # no activation count in DS


def profile_fvcore(model, input_size=(3, 224, 224), batch_size=1, detailed=False, force_cpu=False):
    if force_cpu:
        model = model.to('cpu')
    device, dtype = next(model.parameters()).device, next(model.parameters()).dtype
    example_input = torch.ones((batch_size,) + input_size, device=device, dtype=dtype)
    fca = FlopCountAnalysis(model, example_input)
    aca = ActivationCountAnalysis(model, example_input)
    if detailed:
        fcs = flop_count_str(fca)
        print(fcs)
    return fca.total(), aca.total()


class BenchmarkRunner:
    def __init__(
            self,
            model_name,
            detail=False,
            device='cuda',
            torchscript=False,
            torchcompile=None,
            torchcompile_mode=None,
            aot_autograd=False,
            reparam=False,
            precision='float32',
            fuser='',
            num_warm_iter=10,
            num_bench_iter=50,
            use_train_size=False,
            **kwargs
    ):
        self.model_name = model_name
        self.detail = detail
        self.device = device
        self.amp_dtype, self.model_dtype, self.data_dtype = resolve_precision(precision)
        self.channels_last = kwargs.pop('channels_last', False)
        if self.amp_dtype is not None:
            self.amp_autocast = partial(torch.amp.autocast, device_type=device, dtype=self.amp_dtype)
        else:
            self.amp_autocast = suppress

        if fuser:
            set_jit_fuser(fuser)
        self.model = create_model(
            model_name,
            num_classes=kwargs.pop('num_classes', None),
            in_chans=3,
            global_pool=kwargs.pop('gp', 'fast'),
            scriptable=torchscript,
            drop_rate=kwargs.pop('drop', 0.),
            drop_path_rate=kwargs.pop('drop_path', None),
            drop_block_rate=kwargs.pop('drop_block', None),
            **kwargs.pop('model_kwargs', {}),
        )
        if reparam:
            self.model = reparameterize_model(self.model)
        self.model.to(
            device=self.device,
            dtype=self.model_dtype,
            memory_format=torch.channels_last if self.channels_last else None,
        )
        self.num_classes = self.model.num_classes
        self.param_count = count_params(self.model)
        _logger.info('Model %s created, param count: %d' % (model_name, self.param_count))

        data_config = resolve_data_config(kwargs, model=self.model, use_test_size=not use_train_size)
        self.input_size = data_config['input_size']
        self.batch_size = kwargs.pop('batch_size', 256)

        self.compiled = False
        if torchscript:
            self.model = torch.jit.script(self.model)
            self.compiled = True
        elif torchcompile:
            assert has_compile, 'A version of torch w/ torch.compile() is required, possibly a nightly.'
            torch._dynamo.reset()
            self.model = torch.compile(self.model, backend=torchcompile, mode=torchcompile_mode)
            self.compiled = True
        elif aot_autograd:
            assert has_functorch, "functorch is needed for --aot-autograd"
            self.model = memory_efficient_fusion(self.model)
            self.compiled = True

        self.example_inputs = None
        self.num_warm_iter = num_warm_iter
        self.num_bench_iter = num_bench_iter
        self.log_freq = num_bench_iter // 5
        if 'cuda' in self.device:
            self.time_fn = partial(cuda_timestamp, device=self.device)
        else:
            self.time_fn = timestamp

    def _init_input(self):
        self.example_inputs = torch.randn(
            (self.batch_size,) + self.input_size, device=self.device, dtype=self.data_dtype)
        if self.channels_last:
            self.example_inputs = self.example_inputs.contiguous(memory_format=torch.channels_last)


class InferenceBenchmarkRunner(BenchmarkRunner):

    def __init__(
            self,
            model_name,
            device='cuda',
            torchscript=False,
            **kwargs
    ):
        super().__init__(model_name=model_name, device=device, torchscript=torchscript, **kwargs)
        self.model.eval()

    def run(self):
        def _step():
            t_step_start = self.time_fn()
            with self.amp_autocast():
                output = self.model(self.example_inputs)
            t_step_end = self.time_fn(True)
            return t_step_end - t_step_start

        _logger.info(
            f'Running inference benchmark on {self.model_name} for {self.num_bench_iter} steps w/ '
            f'input size {self.input_size} and batch size {self.batch_size}.')

        with torch.inference_mode():
            self._init_input()

            for _ in range(self.num_warm_iter):
                _step()

            total_step = 0.
            num_samples = 0
            t_run_start = self.time_fn()
            for i in range(self.num_bench_iter):
                delta_fwd = _step()
                total_step += delta_fwd
                num_samples += self.batch_size
                num_steps = i + 1
                if num_steps % self.log_freq == 0:
                    _logger.info(
                        f"Infer [{num_steps}/{self.num_bench_iter}]."
                        f" {num_samples / total_step:0.2f} samples/sec."
                        f" {1000 * total_step / num_steps:0.3f} ms/step.")
            t_run_end = self.time_fn(True)
            t_run_elapsed = t_run_end - t_run_start

        results = dict(
            samples_per_sec=round(num_samples / t_run_elapsed, 2),
            step_time=round(1000 * total_step / self.num_bench_iter, 3),
            batch_size=self.batch_size,
            img_size=self.input_size[-1],
            param_count=round(self.param_count / 1e6, 2),
        )

        retries = 0 if self.compiled else 2  # skip profiling if model is scripted
        while retries:
            retries -= 1
            try:
                if has_deepspeed_profiling:
                    macs, _ = profile_deepspeed(self.model, self.input_size)
                    results['gmacs'] = round(macs / 1e9, 2)
                elif has_fvcore_profiling:
                    macs, activations = profile_fvcore(self.model, self.input_size, force_cpu=not retries)
                    results['gmacs'] = round(macs / 1e9, 2)
                    results['macts'] = round(activations / 1e6, 2)
            except RuntimeError as e:
                pass

        _logger.info(
            f"Inference benchmark of {self.model_name} done. "
            f"{results['samples_per_sec']:.2f} samples/sec, {results['step_time']:.2f} ms/step")

        return results


class TrainBenchmarkRunner(BenchmarkRunner):

    def __init__(
            self,
            model_name,
            device='cuda',
            torchscript=False,
            **kwargs
    ):
        super().__init__(model_name=model_name, device=device, torchscript=torchscript, **kwargs)
        self.model.train()

        self.loss = nn.CrossEntropyLoss().to(self.device)
        self.target_shape = tuple()

        self.optimizer = create_optimizer_v2(
            self.model,
            opt=kwargs.pop('opt', 'sgd'),
            lr=kwargs.pop('lr', 1e-4))

        if kwargs.pop('grad_checkpointing', False):
            self.model.set_grad_checkpointing()

    def _gen_target(self, batch_size):
        return torch.empty(
            (batch_size,) + self.target_shape, device=self.device, dtype=torch.long).random_(self.num_classes)

    def run(self):
        def _step(detail=False):
            self.optimizer.zero_grad()  # can this be ignored?
            t_start = self.time_fn()
            t_fwd_end = t_start
            t_bwd_end = t_start
            with self.amp_autocast():
                output = self.model(self.example_inputs)
                if isinstance(output, tuple):
                    output = output[0]
                if detail:
                    t_fwd_end = self.time_fn(True)
                target = self._gen_target(output.shape[0])
                self.loss(output, target).backward()
                if detail:
                    t_bwd_end = self.time_fn(True)
            self.optimizer.step()
            t_end = self.time_fn(True)
            if detail:
                delta_fwd = t_fwd_end - t_start
                delta_bwd = t_bwd_end - t_fwd_end
                delta_opt = t_end - t_bwd_end
                return delta_fwd, delta_bwd, delta_opt
            else:
                delta_step = t_end - t_start
                return delta_step

        _logger.info(
            f'Running train benchmark on {self.model_name} for {self.num_bench_iter} steps w/ '
            f'input size {self.input_size} and batch size {self.batch_size}.')

        self._init_input()

        for _ in range(self.num_warm_iter):
            _step()

        t_run_start = self.time_fn()
        if self.detail:
            total_fwd = 0.
            total_bwd = 0.
            total_opt = 0.
            num_samples = 0
            for i in range(self.num_bench_iter):
                delta_fwd, delta_bwd, delta_opt = _step(True)
                num_samples += self.batch_size
                total_fwd += delta_fwd
                total_bwd += delta_bwd
                total_opt += delta_opt
                num_steps = (i + 1)
                if num_steps % self.log_freq == 0:
                    total_step = total_fwd + total_bwd + total_opt
                    _logger.info(
                        f"Train [{num_steps}/{self.num_bench_iter}]."
                        f" {num_samples / total_step:0.2f} samples/sec."
                        f" {1000 * total_fwd / num_steps:0.3f} ms/step fwd,"
                        f" {1000 * total_bwd / num_steps:0.3f} ms/step bwd,"
                        f" {1000 * total_opt / num_steps:0.3f} ms/step opt."
                    )
            total_step = total_fwd + total_bwd + total_opt
            t_run_elapsed = self.time_fn() - t_run_start
            results = dict(
                samples_per_sec=round(num_samples / t_run_elapsed, 2),
                step_time=round(1000 * total_step / self.num_bench_iter, 3),
                fwd_time=round(1000 * total_fwd / self.num_bench_iter, 3),
                bwd_time=round(1000 * total_bwd / self.num_bench_iter, 3),
                opt_time=round(1000 * total_opt / self.num_bench_iter, 3),
                batch_size=self.batch_size,
                img_size=self.input_size[-1],
                param_count=round(self.param_count / 1e6, 2),
            )
        else:
            total_step = 0.
            num_samples = 0
            for i in range(self.num_bench_iter):
                delta_step = _step(False)
                num_samples += self.batch_size
                total_step += delta_step
                num_steps = (i + 1)
                if num_steps % self.log_freq == 0:
                    _logger.info(
                        f"Train [{num_steps}/{self.num_bench_iter}]."
                        f" {num_samples / total_step:0.2f} samples/sec."
                        f" {1000 * total_step / num_steps:0.3f} ms/step.")
            t_run_elapsed = self.time_fn() - t_run_start
            results = dict(
                samples_per_sec=round(num_samples / t_run_elapsed, 2),
                step_time=round(1000 * total_step / self.num_bench_iter, 3),
                batch_size=self.batch_size,
                img_size=self.input_size[-1],
                param_count=round(self.param_count / 1e6, 2),
            )

        _logger.info(
            f"Train benchmark of {self.model_name} done. "
            f"{results['samples_per_sec']:.2f} samples/sec, {results['step_time']:.2f} ms/sample")

        return results


class ProfileRunner(BenchmarkRunner):

    def __init__(self, model_name, device='cuda', profiler='', **kwargs):
        super().__init__(model_name=model_name, device=device, **kwargs)
        if not profiler:
            if has_deepspeed_profiling:
                profiler = 'deepspeed'
            elif has_fvcore_profiling:
                profiler = 'fvcore'
        assert profiler, "One of deepspeed or fvcore needs to be installed for profiling to work."
        self.profiler = profiler
        self.model.eval()

    def run(self):
        _logger.info(
            f'Running profiler on {self.model_name} w/ '
            f'input size {self.input_size} and batch size {self.batch_size}.')

        macs = 0
        activations = 0
        if self.profiler == 'deepspeed':
            macs, _ = profile_deepspeed(self.model, self.input_size, batch_size=self.batch_size, detailed=True)
        elif self.profiler == 'fvcore':
            macs, activations = profile_fvcore(self.model, self.input_size, batch_size=self.batch_size, detailed=True)

        results = dict(
            gmacs=round(macs / 1e9, 2),
            macts=round(activations / 1e6, 2),
            batch_size=self.batch_size,
            img_size=self.input_size[-1],
            param_count=round(self.param_count / 1e6, 2),
        )

        _logger.info(
            f"Profile of {self.model_name} done. "
            f"{results['gmacs']:.2f} GMACs, {results['param_count']:.2f} M params.")

        return results


def _try_run(
        model_name,
        bench_fn,
        bench_kwargs,
        initial_batch_size,
        no_batch_size_retry=False
):
    batch_size = initial_batch_size
    results = dict()
    error_str = 'Unknown'
    while batch_size:
        try:
            torch.cuda.empty_cache()
            bench = bench_fn(model_name=model_name, batch_size=batch_size, **bench_kwargs)
            results = bench.run()
            return results
        except RuntimeError as e:
            error_str = str(e)
            _logger.error(f'"{error_str}" while running benchmark.')
            if not check_batch_size_retry(error_str):
                _logger.error(f'Unrecoverable error encountered while benchmarking {model_name}, skipping.')
                break
            if no_batch_size_retry:
                break
        batch_size = decay_batch_step(batch_size)
        _logger.warning(f'Reducing batch size to {batch_size} for retry.')
    results['error'] = error_str
    return results


def benchmark(args):
    if args.amp:
        _logger.warning("Overriding precision to 'amp' since --amp flag set.")
        args.precision = 'amp' if args.amp_dtype == 'float16' else '_'.join(['amp', args.amp_dtype])
    _logger.info(f'Benchmarking in {args.precision} precision. '
                 f'{"NHWC" if args.channels_last else "NCHW"} layout. '
                 f'torchscript {"enabled" if args.torchscript else "disabled"}')

    bench_kwargs = vars(args).copy()
    bench_kwargs.pop('amp')
    model = bench_kwargs.pop('model')
    batch_size = bench_kwargs.pop('batch_size')

    bench_fns = (InferenceBenchmarkRunner,)
    prefixes = ('infer',)
    if args.bench == 'both':
        bench_fns = (
            InferenceBenchmarkRunner,
            TrainBenchmarkRunner
        )
        prefixes = ('infer', 'train')
    elif args.bench == 'train':
        bench_fns = TrainBenchmarkRunner,
        prefixes = 'train',
    elif args.bench.startswith('profile'):
        # specific profiler used if included in bench mode string, otherwise default to deepspeed, fallback to fvcore
        if 'deepspeed' in args.bench:
            assert has_deepspeed_profiling, "deepspeed must be installed to use deepspeed flop counter"
            bench_kwargs['profiler'] = 'deepspeed'
        elif 'fvcore' in args.bench:
            assert has_fvcore_profiling, "fvcore must be installed to use fvcore flop counter"
            bench_kwargs['profiler'] = 'fvcore'
        bench_fns = ProfileRunner,
        batch_size = 1

    model_results = OrderedDict(model=model)
    for prefix, bench_fn in zip(prefixes, bench_fns):
        run_results = _try_run(
            model,
            bench_fn,
            bench_kwargs=bench_kwargs,
            initial_batch_size=batch_size,
            no_batch_size_retry=args.no_retry,
        )
        if prefix and 'error' not in run_results:
            run_results = {'_'.join([prefix, k]): v for k, v in run_results.items()}
        model_results.update(run_results)
        if 'error' in run_results:
            break
    if 'error' not in model_results:
        param_count = model_results.pop('infer_param_count', model_results.pop('train_param_count', 0))
        model_results.setdefault('param_count', param_count)
        model_results.pop('train_param_count', 0)
    return model_results


def main():
    setup_default_logging()
    args = parser.parse_args()
    model_cfgs = []
    model_names = []

    if args.fast_norm:
        set_fast_norm()

    if args.model_list:
        args.model = ''
        with open(args.model_list) as f:
            model_names = [line.rstrip() for line in f]
        model_cfgs = [(n, None) for n in model_names]
    elif args.model == 'all':
        # validate all models in a list of names with pretrained checkpoints
        args.pretrained = True
        model_names = list_models(pretrained=True, exclude_filters=['*in21k'])
        model_cfgs = [(n, None) for n in model_names]
    elif not is_model(args.model):
        # model name doesn't exist, try as wildcard filter
        model_names = list_models(args.model)
        model_cfgs = [(n, None) for n in model_names]

    if len(model_cfgs):
        _logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names)))
        results = []
        try:
            for m, _ in model_cfgs:
                if not m:
                    continue
                args.model = m
                r = benchmark(args)
                if r:
                    results.append(r)
                time.sleep(10)
        except KeyboardInterrupt as e:
            pass
        sort_key = 'infer_samples_per_sec'
        if 'train' in args.bench:
            sort_key = 'train_samples_per_sec'
        elif 'profile' in args.bench:
            sort_key = 'infer_gmacs'
        results = filter(lambda x: sort_key in x, results)
        results = sorted(results, key=lambda x: x[sort_key], reverse=True)
    else:
        results = benchmark(args)

    if args.results_file:
        write_results(args.results_file, results, format=args.results_format)

    # output results in JSON to stdout w/ delimiter for runner script
    print(f'--result\n{json.dumps(results, indent=4)}')


def write_results(results_file, results, format='csv'):
    with open(results_file, mode='w') as cf:
        if format == 'json':
            json.dump(results, cf, indent=4)
        else:
            if not isinstance(results, (list, tuple)):
                results = [results]
            if not results:
                return
            dw = csv.DictWriter(cf, fieldnames=results[0].keys())
            dw.writeheader()
            for r in results:
                dw.writerow(r)
            cf.flush()


if __name__ == '__main__':
    main()


================================================
FILE: bulk_runner.py
================================================
#!/usr/bin/env python3
""" Bulk Model Script Runner

Run validation or benchmark script in separate process for each model

Benchmark all 'vit*' models:
python bulk_runner.py  --model-list 'vit*' --results-file vit_bench.csv benchmark.py --amp -b 512

Validate all models:
python bulk_runner.py  --model-list all --results-file val.csv --pretrained validate.py --data-dir /imagenet/validation/ --amp -b 512 --retry

Hacked together by Ross Wightman (https://github.com/rwightman)
"""
import argparse
import os
import sys
import csv
import json
import subprocess
import time
from typing import Callable, List, Tuple, Union


from timm.models import is_model, list_models, get_pretrained_cfg, get_arch_pretrained_cfgs


parser = argparse.ArgumentParser(description='Per-model process launcher')

# model and results args
parser.add_argument(
    '--model-list', metavar='NAME', default='',
    help='txt file based list of model names to benchmark')
parser.add_argument(
    '--results-file', default='', type=str, metavar='FILENAME',
    help='Output csv file for validation results (summary)')
parser.add_argument(
    '--sort-key', default='', type=str, metavar='COL',
    help='Specify sort key for results csv')
parser.add_argument(
    "--pretrained", action='store_true',
    help="only run models with pretrained weights")

parser.add_argument(
    "--delay",
    type=float,
    default=0,
    help="Interval, in seconds, to delay between model invocations.",
)
parser.add_argument(
    "--start_method", type=str, default="spawn", choices=["spawn", "fork", "forkserver"],
    help="Multiprocessing start method to use when creating workers.",
)
parser.add_argument(
    "--no_python",
    help="Skip prepending the script with 'python' - just execute it directly. Useful "
         "when the script is not a Python script.",
)
parser.add_argument(
    "-m",
    "--module",
    help="Change each process to interpret the launch script as a Python module, executing "
         "with the same behavior as 'python -m'.",
)

# positional
parser.add_argument(
    "script", type=str,
    help="Full path to the program/script to be launched for each model config.",
)
parser.add_argument("script_args", nargs=argparse.REMAINDER)


def cmd_from_args(args) -> Tuple[Union[Callable, str], List[str]]:
    # If ``args`` not passed, defaults to ``sys.argv[:1]``
    with_python = not args.no_python
    cmd: Union[Callable, str]
    cmd_args = []
    if with_python:
        cmd = os.getenv("PYTHON_EXEC", sys.executable)
        cmd_args.append("-u")
        if args.module:
            cmd_args.append("-m")
        cmd_args.append(args.script)
    else:
        if args.module:
            raise ValueError(
                "Don't use both the '--no_python' flag"
                " and the '--module' flag at the same time."
            )
        cmd = args.script
    cmd_args.extend(args.script_args)

    return cmd, cmd_args


def _get_model_cfgs(
        model_names,
        num_classes=None,
        expand_train_test=False,
        include_crop=True,
        expand_arch=False,
):
    model_cfgs = set()

    for name in model_names:
        if expand_arch:
            pt_cfgs = get_arch_pretrained_cfgs(name).values()
        else:
            pt_cfg = get_pretrained_cfg(name)
            pt_cfgs = [pt_cfg] if pt_cfg is not None else []

        for cfg in pt_cfgs:
            if cfg.input_size is None:
                continue
            if num_classes is not None and getattr(cfg, 'num_classes', 0) != num_classes:
                continue

            # Add main configuration
            size = cfg.input_size[-1]
            if include_crop:
                model_cfgs.add((name, size, cfg.crop_pct))
            else:
                model_cfgs.add((name, size))

            # Add test configuration if required
            if expand_train_test and cfg.test_input_size is not None:
                test_size = cfg.test_input_size[-1]
                if include_crop:
                    test_crop = cfg.test_crop_pct or cfg.crop_pct
                    model_cfgs.add((name, test_size, test_crop))
                else:
                    model_cfgs.add((name, test_size))

    # Format the output
    if include_crop:
        return [(n, {'img-size': r, 'crop-pct': cp}) for n, r, cp in sorted(model_cfgs)]
    else:
        return [(n, {'img-size': r}) for n, r in sorted(model_cfgs)]


def main():
    args = parser.parse_args()
    cmd, cmd_args = cmd_from_args(args)

    model_cfgs = []
    if args.model_list == 'all':
        model_names = list_models(
            pretrained=args.pretrained,  # only include models w/ pretrained checkpoints if set
        )
        model_cfgs = [(n, None) for n in model_names]
    elif args.model_list == 'all_in1k':
        model_names = list_models(pretrained=True)
        model_cfgs = _get_model_cfgs(model_names, num_classes=1000, expand_train_test=True)
    elif args.model_list == 'all_res':
        model_names = list_models()
        model_cfgs = _get_model_cfgs(model_names, expand_train_test=True, include_crop=False, expand_arch=True)
    elif not is_model(args.model_list):
        # model name doesn't exist, try as wildcard filter
        model_names = list_models(args.model_list)
        model_cfgs = [(n, None) for n in model_names]

    if not model_cfgs and os.path.exists(args.model_list):
        with open(args.model_list) as f:
            model_names = [line.rstrip() for line in f]
            model_cfgs = _get_model_cfgs(
                model_names,
                #num_classes=1000,
                expand_train_test=True,
                #include_crop=False,
            )

    if len(model_cfgs):
        results_file = args.results_file or './results.csv'
        results = []
        errors = []
        model_strings = '\n'.join([f'{x[0]}, {x[1]}' for x in model_cfgs])
        print(f"Running script on these models:\n {model_strings}")
        if not args.sort_key:
            if 'benchmark' in args.script:
                if any(['train' in a for a in args.script_args]):
                    sort_key = 'train_samples_per_sec'
                else:
                    sort_key = 'infer_samples_per_sec'
            else:
                sort_key = 'top1'
        else:
            sort_key = args.sort_key
        print(f'Script: {args.script}, Args: {args.script_args}, Sort key: {sort_key}')

        try:
            for m, ax in model_cfgs:
                if not m:
                    continue
                args_str = (cmd, *[str(e) for e in cmd_args], '--model', m)
                if ax is not None:
                    extra_args = [(f'--{k}', str(v)) for k, v in ax.items()]
                    extra_args = [i for t in extra_args for i in t]
                    args_str += tuple(extra_args)
                try:
                    o = subprocess.check_output(args=args_str).decode('utf-8').split('--result')[-1]
                    r = json.loads(o)
                    results.append(r)
                except Exception as e:
                    # FIXME batch_size retry loop is currently done in either validation.py or benchmark.py
                    # for further robustness (but more overhead), we may want to manage that by looping here...
                    errors.append(dict(model=m, error=str(e)))
                if args.delay:
                    time.sleep(args.delay)
        except KeyboardInterrupt as e:
            pass

        errors.extend(list(filter(lambda x: 'error' in x, results)))
        if errors:
            print(f'{len(errors)} models had errors during run.')
            for e in errors:
                if 'model' in e:
                    print(f"\t {e['model']} ({e.get('error', 'Unknown')})")
                else:
                    print(e)

        results = list(filter(lambda x: 'error' not in x, results))

        no_sortkey = list(filter(lambda x: sort_key not in x, results))
        if no_sortkey:
            print(f'{len(no_sortkey)} results missing sort key, skipping sort.')
        else:
            results = sorted(results, key=lambda x: x[sort_key], reverse=True)

        if len(results):
            print(f'{len(results)} models run successfully. Saving results to {results_file}.')
            write_results(results_file, results)


def write_results(results_file, results):
    with open(results_file, mode='w') as cf:
        dw = csv.DictWriter(cf, fieldnames=results[0].keys())
        dw.writeheader()
        for r in results:
            dw.writerow(r)
        cf.flush()


if __name__ == '__main__':
    main()


================================================
FILE: clean_checkpoint.py
================================================
#!/usr/bin/env python3
""" Checkpoint Cleaning Script

Takes training checkpoints with GPU tensors, optimizer state, extra dict keys, etc.
and outputs a CPU  tensor checkpoint with only the `state_dict` along with SHA256
calculation for model zoo compatibility.

Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
"""
import torch
import argparse
import os
import hashlib
import shutil
import tempfile
from timm.models import load_state_dict
try:
    import safetensors.torch
    _has_safetensors = True
except ImportError:
    _has_safetensors = False

parser = argparse.ArgumentParser(description='PyTorch Checkpoint Cleaner')
parser.add_argument('--checkpoint', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('--output', default='', type=str, metavar='PATH',
                    help='output path')
parser.add_argument('--no-use-ema', dest='no_use_ema', action='store_true',
                    help='use ema version of weights if present')
parser.add_argument('--no-hash', dest='no_hash', action='store_true',
                    help='no hash in output filename')
parser.add_argument('--clean-aux-bn', dest='clean_aux_bn', action='store_true',
                    help='remove auxiliary batch norm layers (from SplitBN training) from checkpoint')
parser.add_argument('--safetensors', action='store_true',
                    help='Save weights using safetensors instead of the default torch way (pickle).')


def main():
    args = parser.parse_args()

    if os.path.exists(args.output):
        print("Error: Output filename ({}) already exists.".format(args.output))
        exit(1)

    clean_checkpoint(
        args.checkpoint,
        args.output,
        not args.no_use_ema,
        args.no_hash,
        args.clean_aux_bn,
        safe_serialization=args.safetensors,
    )


def clean_checkpoint(
        checkpoint,
        output,
        use_ema=True,
        no_hash=False,
        clean_aux_bn=False,
        safe_serialization: bool=False,
):
    # Load an existing checkpoint to CPU, strip everything but the state_dict and re-save
    if checkpoint and os.path.isfile(checkpoint):
        print("=> Loading checkpoint '{}'".format(checkpoint))
        state_dict = load_state_dict(checkpoint, use_ema=use_ema)
        new_state_dict = {}
        for k, v in state_dict.items():
            if clean_aux_bn and 'aux_bn' in k:
                # If all aux_bn keys are removed, the SplitBN layers will end up as normal and
                # load with the unmodified model using BatchNorm2d.
                continue
            name = k[7:] if k.startswith('module.') else k
            new_state_dict[name] = v
        print("=> Loaded state_dict from '{}'".format(checkpoint))

        ext = ''
        if output:
            checkpoint_root, checkpoint_base = os.path.split(output)
            checkpoint_base, ext = os.path.splitext(checkpoint_base)
        else:
            checkpoint_root = ''
            checkpoint_base = os.path.split(checkpoint)[1]
            checkpoint_base = os.path.splitext(checkpoint_base)[0]

        temp_filename = '__' + checkpoint_base
        if safe_serialization:
            assert _has_safetensors, "`pip install safetensors` to use .safetensors"
            safetensors.torch.save_file(new_state_dict, temp_filename)
        else:
            torch.save(new_state_dict, temp_filename)

        with open(temp_filename, 'rb') as f:
            sha_hash = hashlib.sha256(f.read()).hexdigest()

        if ext:
            final_ext = ext
        else:
            final_ext = ('.safetensors' if safe_serialization else '.pth')

        if no_hash:
            final_filename = checkpoint_base + final_ext
        else:
            final_filename = '-'.join([checkpoint_base, sha_hash[:8]]) + final_ext

        shutil.move(temp_filename, os.path.join(checkpoint_root, final_filename))
        print("=> Saved state_dict to '{}, SHA256: {}'".format(final_filename, sha_hash))
        return final_filename
    else:
        print("Error: Checkpoint ({}) doesn't exist".format(checkpoint))
        return ''


if __name__ == '__main__':
    main()


================================================
FILE: convert/convert_from_mxnet.py
================================================
import argparse
import hashlib
import os

import mxnet as mx
import gluoncv
import torch
from timm import create_model

parser = argparse.ArgumentParser(description='Convert from MXNet')
parser.add_argument('--model', default='all', type=str, metavar='MODEL',
                    help='Name of model to train (default: "all"')


def convert(mxnet_name, torch_name):
    # download and load the pre-trained model
    net = gluoncv.model_zoo.get_model(mxnet_name, pretrained=True)

    # create corresponding torch model
    torch_net = create_model(torch_name)

    mxp = [(k, v) for k, v in net.collect_params().items() if 'running' not in k]
    torchp = list(torch_net.named_parameters())
    torch_params = {}

    # convert parameters
    # NOTE: we are relying on the fact that the order of parameters
    # are usually exactly the same between these models, thus no key name mapping
    # is necessary. Asserts will trip if this is not the case.
    for (tn, tv), (mn, mv) in zip(torchp, mxp):
        m_split = mn.split('_')
        t_split = tn.split('.')
        print(t_split, m_split)
        print(tv.shape, mv.shape)

        # ensure ordering of BN params match since their sizes are not specific
        if m_split[-1] == 'gamma':
            assert t_split[-1] == 'weight'
        if m_split[-1] == 'beta':
            assert t_split[-1] == 'bias'

        # ensure shapes match
        assert all(t == m for t, m in zip(tv.shape, mv.shape))

        torch_tensor = torch.from_numpy(mv.data().asnumpy())
        torch_params[tn] = torch_tensor

    # convert buffers (batch norm running stats)
    mxb = [(k, v) for k, v in net.collect_params().items() if any(x in k for x in ['running_mean', 'running_var'])]
    torchb = [(k, v) for k, v in torch_net.named_buffers() if 'num_batches' not in k]
    for (tn, tv), (mn, mv) in zip(torchb, mxb):
        print(tn, mn)
        print(tv.shape, mv.shape)

        # ensure ordering of BN params match since their sizes are not specific
        if 'running_var' in tn:
            assert 'running_var' in mn
        if 'running_mean' in tn:
            assert 'running_mean' in mn
            
        torch_tensor = torch.from_numpy(mv.data().asnumpy())
        torch_params[tn] = torch_tensor

    torch_net.load_state_dict(torch_params)
    torch_filename = './%s.pth' % torch_name
    torch.save(torch_net.state_dict(), torch_filename)
    with open(torch_filename, 'rb') as f:
        sha_hash = hashlib.sha256(f.read()).hexdigest()
    final_filename = os.path.splitext(torch_filename)[0] + '-' + sha_hash[:8] + '.pth'
    os.rename(torch_filename, final_filename)
    print("=> Saved converted model to '{}, SHA256: {}'".format(final_filename, sha_hash))


def map_mx_to_torch_model(mx_name):
    torch_name = mx_name.lower()
    if torch_name.startswith('se_'):
        torch_name = torch_name.replace('se_', 'se')
    elif torch_name.startswith('senet_'):
        torch_name = torch_name.replace('senet_', 'senet')
    elif torch_name.startswith('inceptionv3'):
        torch_name = torch_name.replace('inceptionv3', 'inception_v3')
    torch_name = 'gluon_' + torch_name
    return torch_name


ALL = ['resnet18_v1b', 'resnet34_v1b', 'resnet50_v1b', 'resnet101_v1b', 'resnet152_v1b',
       'resnet50_v1c', 'resnet101_v1c', 'resnet152_v1c', 'resnet50_v1d', 'resnet101_v1d', 'resnet152_v1d',
       #'resnet50_v1e', 'resnet101_v1e', 'resnet152_v1e',
       'resnet50_v1s', 'resnet101_v1s', 'resnet152_v1s', 'resnext50_32x4d', 'resnext101_32x4d', 'resnext101_64x4d',
       'se_resnext50_32x4d', 'se_resnext101_32x4d', 'se_resnext101_64x4d', 'senet_154', 'inceptionv3']


def main():
    args = parser.parse_args()

    if not args.model or args.model == 'all':
        for mx_model in ALL:
            torch_model = map_mx_to_torch_model(mx_model)
            convert(mx_model, torch_model)
    else:
        mx_model = args.model
        torch_model = map_mx_to_torch_model(mx_model)
        convert(mx_model, torch_model)


if __name__ == '__main__':
    main()


================================================
FILE: convert/convert_nest_flax.py
================================================
"""
Convert weights from https://github.com/google-research/nested-transformer
NOTE: You'll need https://github.com/google/CommonLoopUtils, not included in requirements.txt
"""

import sys

import numpy as np
import torch

from clu import checkpoint


arch_depths = {
    'nest_base': [2, 2, 20],
    'nest_small': [2, 2, 20],
    'nest_tiny': [2, 2, 8],
}


def convert_nest(checkpoint_path, arch):
    """
    Expects path to checkpoint which is a dir containing 4 files like in each of these folders
        - https://console.cloud.google.com/storage/browser/gresearch/nest-checkpoints
    `arch` is needed to 
    Returns a state dict that can be used with `torch.nn.Module.load_state_dict`
    Hint: Follow timm.models.nest.Nest.__init__ and 
    https://github.com/google-research/nested-transformer/blob/main/models/nest_net.py
    """
    assert arch in ['nest_base', 'nest_small', 'nest_tiny'], "Your `arch` is not supported"

    flax_dict = checkpoint.load_state_dict(checkpoint_path)['optimizer']['target']
    state_dict = {}

    # Patch embedding
    state_dict['patch_embed.proj.weight'] = torch.tensor(
        flax_dict['PatchEmbedding_0']['Conv_0']['kernel']).permute(3, 2, 0, 1)
    state_dict['patch_embed.proj.bias'] = torch.tensor(flax_dict['PatchEmbedding_0']['Conv_0']['bias'])
    
    # Positional embeddings
    posemb_keys = [k for k in flax_dict.keys() if k.startswith('PositionEmbedding')]
    for i, k in enumerate(posemb_keys):
        state_dict[f'levels.{i}.pos_embed'] = torch.tensor(flax_dict[k]['pos_embedding'])
    
    # Transformer encoders
    depths = arch_depths[arch]
    for level in range(len(depths)):
        for layer in range(depths[level]):
            global_layer_ix = sum(depths[:level]) + layer
            # Norms
            for i in range(2):
                state_dict[f'levels.{level}.transformer_encoder.{layer}.norm{i+1}.weight'] = torch.tensor(
                    flax_dict[f'EncoderNDBlock_{global_layer_ix}'][f'LayerNorm_{i}']['scale'])
                state_dict[f'levels.{level}.transformer_encoder.{layer}.norm{i+1}.bias'] = torch.tensor(
                    flax_dict[f'EncoderNDBlock_{global_layer_ix}'][f'LayerNorm_{i}']['bias'])
            # Attention qkv
            w_q = flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['DenseGeneral_0']['kernel']
            w_kv = flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['DenseGeneral_1']['kernel']
            # Pay attention to dims here (maybe get pen and paper)
            w_kv = np.concatenate(np.split(w_kv, 2, -1), 1)
            w_qkv = np.concatenate([w_q, w_kv], 1)
            state_dict[f'levels.{level}.transformer_encoder.{layer}.attn.qkv.weight'] = torch.tensor(w_qkv).flatten(1).permute(1,0)
            b_q = flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['DenseGeneral_0']['bias']
            b_kv = flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['DenseGeneral_1']['bias']
            # Pay attention to dims here (maybe get pen and paper)
            b_kv = np.concatenate(np.split(b_kv, 2, -1), 0)
            b_qkv = np.concatenate([b_q, b_kv], 0)
            state_dict[f'levels.{level}.transformer_encoder.{layer}.attn.qkv.bias'] = torch.tensor(b_qkv).reshape(-1)
            # Attention proj
            w_proj = flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['proj_kernel']
            w_proj = torch.tensor(w_proj).permute(2, 1, 0).flatten(1)
            state_dict[f'levels.{level}.transformer_encoder.{layer}.attn.proj.weight'] = w_proj
            state_dict[f'levels.{level}.transformer_encoder.{layer}.attn.proj.bias'] = torch.tensor(
                flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MultiHeadAttention_0']['bias'])
            # MLP
            for i in range(2):
                state_dict[f'levels.{level}.transformer_encoder.{layer}.mlp.fc{i+1}.weight'] = torch.tensor(
                    flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MlpBlock_0'][f'Dense_{i}']['kernel']).permute(1, 0)
                state_dict[f'levels.{level}.transformer_encoder.{layer}.mlp.fc{i+1}.bias'] = torch.tensor(
                    flax_dict[f'EncoderNDBlock_{global_layer_ix}']['MlpBlock_0'][f'Dense_{i}']['bias'])

    # Block aggregations (ConvPool)
    for level in range(1, len(depths)):
        # Convs
        state_dict[f'levels.{level}.pool.conv.weight'] = torch.tensor(
            flax_dict[f'ConvPool_{level-1}']['Conv_0']['kernel']).permute(3, 2, 0, 1)
        state_dict[f'levels.{level}.pool.conv.bias'] = torch.tensor(
            flax_dict[f'ConvPool_{level-1}']['Conv_0']['bias'])
        # Norms
        state_dict[f'levels.{level}.pool.norm.weight'] = torch.tensor(
                    flax_dict[f'ConvPool_{level-1}']['LayerNorm_0']['scale'])
        state_dict[f'levels.{level}.pool.norm.bias'] = torch.tensor(
                    flax_dict[f'ConvPool_{level-1}']['LayerNorm_0']['bias'])

    # Final norm
    state_dict[f'norm.weight'] = torch.tensor(flax_dict['LayerNorm_0']['scale'])
    state_dict[f'norm.bias'] = torch.tensor(flax_dict['LayerNorm_0']['bias'])

    # Classifier
    state_dict['head.weight'] = torch.tensor(flax_dict['Dense_0']['kernel']).permute(1, 0)
    state_dict['head.bias'] = torch.tensor(flax_dict['Dense_0']['bias'])

    return state_dict


if __name__ == '__main__':
    variant = sys.argv[1] # base, small, or tiny
    state_dict = convert_nest(f'./nest-{variant[0]}_imagenet', f'nest_{variant}')
    torch.save(state_dict, f'./jx_nest_{variant}.pth')

================================================
FILE: distributed_train.sh
================================================
#!/bin/bash
NUM_PROC=$1
shift
torchrun --nproc_per_node=$NUM_PROC train.py "$@"



================================================
FILE: hfdocs/README.md
================================================
# Hugging Face Timm Docs

## Getting Started

```
pip install git+https://github.com/huggingface/doc-builder.git@main#egg=hf-doc-builder
pip install watchdog black
```

## Preview the Docs Locally

```
doc-builder preview timm hfdocs/source
```


================================================
FILE: hfdocs/source/_toctree.yml
================================================
- sections: 
  - local: index
    title: Home
  - local: quickstart
    title: Quickstart
  - local: installation
    title: Installation
  - local: changes
    title: Changelog
  title: Get started
- sections:
  - local: feature_extraction
    title: Using Pretrained Models as Feature Extractors
  - local: hparams
    title: Hyper-Parameters (HParams)
  - local: training_script
    title: Using The Official Training Script
  - local: hf_hub
    title: Share and Load Models from the 🤗 Hugging Face Hub
  title: Tutorials
- sections:
  - local: models
    title: Model Summaries
  - local: results
    title: Results
  - local: models/adversarial-inception-v3
    title: Adversarial Inception v3
  - local: models/advprop
    title: AdvProp (EfficientNet)
  - local: models/big-transfer
    title: Big Transfer (BiT)
  - local: models/csp-darknet
    title: CSP-DarkNet
  - local: models/csp-resnet
    title: CSP-ResNet
  - local: models/csp-resnext
    title: CSP-ResNeXt
  - local: models/densenet
    title: DenseNet
  - local: models/dla
    title: Deep Layer Aggregation
  - local: models/dpn
    title: Dual Path Network (DPN)
  - local: models/ecaresnet
    title: ECA-ResNet
  - local: models/efficientnet
    title: EfficientNet
  - local: models/efficientnet-pruned
    title: EfficientNet (Knapsack Pruned)
  - local: models/ensemble-adversarial
    title: Ensemble Adversarial Inception ResNet v2
  - local: models/ese-vovnet
    title: ESE-VoVNet
  - local: models/fbnet
    title: FBNet
  - local: models/gloun-inception-v3
    title: (Gluon) Inception v3
  - local: models/gloun-resnet
    title: (Gluon) ResNet
  - local: models/gloun-resnext
    title: (Gluon) ResNeXt
  - local: models/gloun-senet
    title: (Gluon) SENet
  - local: models/gloun-seresnext
    title: (Gluon) SE-ResNeXt
  - local: models/gloun-xception
    title: (Gluon) Xception
  - local: models/hrnet
    title: HRNet
  - local: models/ig-resnext
    title: Instagram ResNeXt WSL
  - local: models/inception-resnet-v2
    title: Inception ResNet v2
  - local: models/inception-v3
    title: Inception v3
  - local: models/inception-v4
    title: Inception v4
  - local: models/legacy-se-resnet
    title: (Legacy) SE-ResNet
  - local: models/legacy-se-resnext
    title: (Legacy) SE-ResNeXt
  - local: models/legacy-senet
    title: (Legacy) SENet
  - local: models/mixnet
    title: MixNet
  - local: models/mnasnet
    title: MnasNet
  - local: models/mobilenet-v2
    title: MobileNet v2
  - local: models/mobilenet-v3
    title: MobileNet v3
  - local: models/nasnet
    title: NASNet
  - local: models/noisy-student
    title: Noisy Student (EfficientNet)
  - local: models/pnasnet
    title: PNASNet
  - local: models/regnetx
    title: RegNetX
  - local: models/regnety
    title: RegNetY
  - local: models/res2net
    title: Res2Net
  - local: models/res2next
    title: Res2NeXt
  - local: models/resnest
    title: ResNeSt
  - local: models/resnet
    title: ResNet
  - local: models/resnet-d
    title: ResNet-D
  - local: models/resnext
    title: ResNeXt
  - local: models/rexnet
    title: RexNet
  - local: models/se-resnet
    title: SE-ResNet
  - local: models/selecsls
    title: SelecSLS
  - local: models/seresnext
    title: SE-ResNeXt
  - local: models/skresnet
    title: SK-ResNet
  - local: models/skresnext
    title: SK-ResNeXt
  - local: models/spnasnet
    title: SPNASNet
  - local: models/ssl-resnet
    title: SSL ResNet
  - local: models/swsl-resnet
    title: SWSL ResNet
  - local: models/swsl-resnext
    title: SWSL ResNeXt
  - local: models/tf-efficientnet
    title: (Tensorflow) EfficientNet
  - local: models/tf-efficientnet-condconv
    title: (Tensorflow) EfficientNet CondConv
  - local: models/tf-efficientnet-lite
    title: (Tensorflow) EfficientNet Lite
  - local: models/tf-inception-v3
    title: (Tensorflow) Inception v3
  - local: models/tf-mixnet
    title: (Tensorflow) MixNet
  - local: models/tf-mobilenet-v3
    title: (Tensorflow) MobileNet v3
  - local: models/tresnet
    title: TResNet
  - local: models/wide-resnet
    title: Wide ResNet
  - local: models/xception
    title: Xception
  title: Model Pages
  isExpanded: false
- sections:
  - local: reference/models
    title: Models
  - local: reference/data
    title: Data
  - local: reference/optimizers
    title: Optimizers
  - local: reference/schedulers
    title: Learning Rate Schedulers
  title: Reference



================================================
FILE: hfdocs/source/changes.mdx
================================================
# Changelog

## Dec 12, 2025
* Add CSATV2 model (thanks https://github.com/gusdlf93) -- a lightweight but high res model with DCT stem & spatial attention. https://huggingface.co/Hyunil/CSATv2
* Add AdaMuon and NAdaMuon optimizer support to existing `timm` Muon impl. Appears more competitive vs AdamW with familiar hparams for image tasks.
* End of year PR cleanup, merge aspects of several long open PR
  * Merge differential attention (`DiffAttention`), add corresponding `DiffParallelScalingBlock` (for ViT), train some wee vits
    * https://huggingface.co/timm/vit_dwee_patch16_reg1_gap_256.sbb_in1k
    * https://huggingface.co/timm/vit_dpwee_patch16_reg1_gap_256.sbb_in1k
  * Add a few pooling modules, `LsePlus` and `SimPool`
  * Cleanup, optimize `DropBlock2d` (also add support to ByobNet based models)
* Bump unit tests to PyTorch 2.9.1 + Python 3.13 on upper end, lower still PyTorch 1.13 + Python 3.10
  
## Dec 1, 2025
* Add lightweight task abstraction, add logits and feature distillation support to train script via new tasks.
* Remove old APEX AMP support

## Nov 4, 2025
* Fix LayerScale / LayerScale2d init bug (init values ignored), introduced in 1.0.21. Thanks https://github.com/Ilya-Fradlin
* Release 1.0.22

## Oct 31, 2025 🎃
* Update imagenet & OOD variant result csv files to include a few new models and verify correctness over several torch & timm versions
* EfficientNet-X and EfficientNet-H B5 model weights added as part of a hparam search for AdamW vs Muon (still iterating on Muon runs)

## Oct 16-20, 2025
* Add an impl of the Muon optimizer (based on https://github.com/KellerJordan/Muon) with customizations
  * extra flexibility and improved handling for conv weights and fallbacks for weight shapes not suited for orthogonalization
  * small speedup for NS iterations by reducing allocs and using fused (b)add(b)mm ops
  * by default uses AdamW (or NAdamW if `nesterov=True`) updates if muon not suitable for parameter shape (or excluded via param group flag)
  * like torch impl, select from several LR scale adjustment fns via `adjust_lr_fn`
  * select from several NS coefficient presets or specify your own via `ns_coefficients`
* First 2 steps of 'meta' device model initialization supported
  * Fix several ops that were breaking creation under 'meta' device context
  * Add device & dtype factory kwarg support to all models and modules (anything inherting from nn.Module) in `timm`
* License fields added to pretrained cfgs in code
* Release 1.0.21

## Sept 21, 2025
* Remap DINOv3 ViT weight tags from `lvd_1689m` -> `lvd1689m` to match (same for `sat_493m` -> `sat493m`)
* Release 1.0.20

## Sept 17, 2025
* DINOv3 (https://arxiv.org/abs/2508.10104) ConvNeXt and ViT models added. ConvNeXt models were mapped to existing `timm` model. ViT support done via the EVA base model w/ a new `RotaryEmbeddingDinoV3` to match the DINOv3 specific RoPE impl
  * HuggingFace Hub: https://huggingface.co/collections/timm/timm-dinov3-68cb08bb0bee365973d52a4d
* MobileCLIP-2 (https://arxiv.org/abs/2508.20691) vision encoders. New MCI3/MCI4 FastViT variants added and weights mapped to existing FastViT and B, L/14 ViTs.
* MetaCLIP-2 Worldwide (https://arxiv.org/abs/2507.22062) ViT encoder weights added.
* SigLIP-2 (https://arxiv.org/abs/2502.14786) NaFlex ViT encoder weights added via timm NaFlexViT model.
* Misc fixes and contributions

## July 23, 2025
* Add `set_input_size()` method to EVA models, used by OpenCLIP 3.0.0 to allow resizing for timm based encoder models.
* Release 1.0.18, needed for PE-Core S & T models in OpenCLIP 3.0.0
* Fix small typing issue that broke Python 3.9 compat. 1.0.19 patch release.

## July 21, 2025
* ROPE support added to NaFlexViT. All models covered by the EVA base (`eva.py`) including EVA, EVA02, Meta PE ViT, `timm` SBB ViT w/ ROPE, and Naver ROPE-ViT can be now loaded in NaFlexViT when `use_naflex=True` passed at model creation time
* More Meta PE ViT encoders added, including small/tiny variants, lang variants w/ tiling, and more spatial variants.
* PatchDropout fixed with NaFlexViT and also w/ EVA models (regression after adding Naver ROPE-ViT)
* Fix XY order with grid_indexing='xy', impacted non-square image use in 'xy' mode (only ROPE-ViT and PE impacted).

## July 7, 2025
* MobileNet-v5 backbone tweaks for improved Google Gemma 3n behaviour (to pair with updated official weights)
  * Add stem bias (zero'd in updated weights, compat break with old weights)
  * GELU -> GELU (tanh approx). A minor change to be closer to JAX
* Add two arguments to layer-decay support, a min scale clamp and 'no optimization' scale threshold
* Add 'Fp32' LayerNorm, RMSNorm, SimpleNorm variants that can be enabled to force computation of norm in float32
* Some typing, argument cleanup for norm, norm+act layers done with above
* Support Naver ROPE-ViT (https://github.com/naver-ai/rope-vit) in `eva.py`, add RotaryEmbeddingMixed module for mixed mode, weights on HuggingFace Hub

|model                                             |img_size|top1  |top5  |param_count|
|--------------------------------------------------|--------|------|------|-----------|
|vit_large_patch16_rope_mixed_ape_224.naver_in1k  |224     |84.84 |97.122|304.4      |
|vit_large_patch16_rope_mixed_224.naver_in1k      |224     |84.828|97.116|304.2      |
|vit_large_patch16_rope_ape_224.naver_in1k        |224     |84.65 |97.154|304.37     |
|vit_large_patch16_rope_224.naver_in1k            |224     |84.648|97.122|304.17     |
|vit_base_patch16_rope_mixed_ape_224.naver_in1k   |224     |83.894|96.754|86.59      |
|vit_base_patch16_rope_mixed_224.naver_in1k       |224     |83.804|96.712|86.44      |
|vit_base_patch16_rope_ape_224.naver_in1k         |224     |83.782|96.61 |86.59      |
|vit_base_patch16_rope_224.naver_in1k             |224     |83.718|96.672|86.43      |
|vit_small_patch16_rope_224.naver_in1k            |224     |81.23 |95.022|21.98      |
|vit_small_patch16_rope_mixed_224.naver_in1k      |224     |81.216|95.022|21.99      |
|vit_small_patch16_rope_ape_224.naver_in1k        |224     |81.004|95.016|22.06      |
|vit_small_patch16_rope_mixed_ape_224.naver_in1k  |224     |80.986|94.976|22.06      |
* Some cleanup of ROPE modules, helpers, and FX tracing leaf registration
* Preparing version 1.0.17 release

## June 26, 2025
* MobileNetV5 backbone (w/ encoder only variant) for [Gemma 3n](https://ai.google.dev/gemma/docs/gemma-3n#parameters) image encoder
* Version 1.0.16 released

## June 23, 2025
* Add F.grid_sample based 2D and factorized pos embed resize to NaFlexViT. Faster when lots of different sizes (based on example by https://github.com/stas-sl).
* Further speed up patch embed resample by replacing vmap with matmul (based on snippet by https://github.com/stas-sl).
* Add 3 initial native aspect NaFlexViT checkpoints created while testing, ImageNet-1k and 3 different pos embed configs w/ same hparams.

 | Model | Top-1 Acc | Top-5 Acc | Params (M) | Eval Seq Len |
 |:---|:---:|:---:|:---:|:---:|
 | [naflexvit_base_patch16_par_gap.e300_s576_in1k](https://hf.co/timm/naflexvit_base_patch16_par_gap.e300_s576_in1k) | 83.67 | 96.45 | 86.63 | 576 |
 | [naflexvit_base_patch16_parfac_gap.e300_s576_in1k](https://hf.co/timm/naflexvit_base_patch16_parfac_gap.e300_s576_in1k) | 83.63 | 96.41 | 86.46 | 576 |
 | [naflexvit_base_patch16_gap.e300_s576_in1k](https://hf.co/timm/naflexvit_base_patch16_gap.e300_s576_in1k) | 83.50 | 96.46 | 86.63 | 576 |
* Support gradient checkpointing for `forward_intermediates` and fix some checkpointing bugs. Thanks https://github.com/brianhou0208
* Add 'corrected weight decay' (https://arxiv.org/abs/2506.02285) as option to AdamW (legacy), Adopt, Kron, Adafactor (BV), Lamb, LaProp, Lion, NadamW, RmsPropTF, SGDW optimizers
* Switch PE (perception encoder) ViT models to use native timm weights instead of remapping on the fly
* Fix cuda stream bug in prefetch loader
  
## June 5, 2025
* Initial NaFlexVit model code. NaFlexVit is a Vision Transformer with:
  1. Encapsulated embedding and position encoding in a single module
  2. Support for nn.Linear patch embedding on pre-patchified (dictionary) inputs
  3. Support for NaFlex variable aspect, variable resolution (SigLip-2: https://arxiv.org/abs/2502.14786)
  4. Support for FlexiViT variable patch size (https://arxiv.org/abs/2212.08013)
  5. Support for NaViT fractional/factorized position embedding (https://arxiv.org/abs/2307.06304)
* Existing vit models in `vision_transformer.py` can be loaded into the NaFlexVit model by adding the `use_naflex=True` flag to `create_model`
  * Some native weights coming soon
* A full NaFlex data pipeline is available that allows training / fine-tuning / evaluating with variable aspect / size images
  * To enable in `train.py` and `validate.py` add the `--naflex-loader` arg, must be used with a NaFlexVit
* To evaluate an existing (classic) ViT loaded in NaFlexVit model w/ NaFlex data pipe:
  * `python validate.py /imagenet --amp -j 8 --model vit_base_patch16_224 --model-kwargs use_naflex=True --naflex-loader --naflex-max-seq-len 256` 
* The training has some extra args features worth noting
  * The `--naflex-train-seq-lens'` argument specifies which sequence lengths to randomly pick from per batch during training
  * The `--naflex-max-seq-len` argument sets the target sequence length for validation
  * Adding `--model-kwargs enable_patch_interpolator=True --naflex-patch-sizes 12 16 24` will enable random patch size selection per-batch w/ interpolation
  * The `--naflex-loss-scale` arg changes loss scaling mode per batch relative to the batch size, `timm` NaFlex loading changes the batch size for each seq len

## May 28, 2025
* Add a number of small/fast models thanks to https://github.com/brianhou0208
  * SwiftFormer - [(ICCV2023) SwiftFormer: Efficient Additive Attention for Transformer-based Real-time Mobile Vision Applications](https://github.com/Amshaker/SwiftFormer) 
  * FasterNet - [(CVPR2023) Run, Don’t Walk: Chasing Higher FLOPS for Faster Neural Networks](https://github.com/JierunChen/FasterNet)
  * SHViT - [(CVPR2024) SHViT: Single-Head Vision Transformer with Memory Efficient](https://github.com/ysj9909/SHViT)
  * StarNet - [(CVPR2024) Rewrite the Stars](https://github.com/ma-xu/Rewrite-the-Stars)
  * GhostNet-V3 [GhostNetV3: Exploring the Training Strategies for Compact Models](https://github.com/huawei-noah/Efficient-AI-Backbones/tree/master/ghostnetv3_pytorch)
* Update EVA ViT (closest match) to support Perception Encoder models (https://arxiv.org/abs/2504.13181) from Meta, loading Hub weights but I still need to push dedicated `timm` weights
  * Add some flexibility to ROPE impl
* Big increase in number of models supporting `forward_intermediates()` and some additional fixes thanks to https://github.com/brianhou0208
  * DaViT, EdgeNeXt, EfficientFormerV2, EfficientViT(MIT), EfficientViT(MSRA), FocalNet, GCViT, HGNet /V2, InceptionNeXt, Inception-V4, MambaOut, MetaFormer, NesT, Next-ViT, PiT, PVT V2, RepGhostNet, RepViT, ResNetV2, ReXNet, TinyViT, TResNet, VoV
* TNT model updated w/ new weights `forward_intermediates()` thanks to https://github.com/brianhou0208
* Add `local-dir:` pretrained schema, can use `local-dir:/path/to/model/folder` for model name to source model / pretrained cfg & weights Hugging Face Hub models (config.json + weights file) from a local folder.
* Fixes, improvements for onnx export
    
## Feb 21, 2025
* SigLIP 2 ViT image encoders added (https://huggingface.co/collections/timm/siglip-2-67b8e72ba08b09dd97aecaf9)
  * Variable resolution / aspect NaFlex versions are a WIP
* Add 'SO150M2' ViT weights trained with SBB recipes, great results, better for ImageNet than previous attempt w/ less training.
  * `vit_so150m2_patch16_reg1_gap_448.sbb_e200_in12k_ft_in1k` - 88.1% top-1
  * `vit_so150m2_patch16_reg1_gap_384.sbb_e200_in12k_ft_in1k` - 87.9% top-1
  * `vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12k_ft_in1k` - 87.3% top-1
  * `vit_so150m2_patch16_reg4_gap_256.sbb_e200_in12k`
* Updated InternViT-300M '2.5' weights
* Release 1.0.15

## Feb 1, 2025
* FYI PyTorch 2.6 & Python 3.13 are tested and working w/ current main and released version of `timm`

## Jan 27, 2025
* Add Kron Optimizer (PSGD w/ Kronecker-factored preconditioner) 
  * Code from https://github.com/evanatyourservice/kron_torch
  * See also https://sites.google.com/site/lixilinx/home/psgd

## Jan 19, 2025
* Fix loading of LeViT safetensor weights, remove conversion code which should have been deactivated
* Add 'SO150M' ViT weights trained with SBB recipes, decent results, but not optimal shape for ImageNet-12k/1k pretrain/ft
  * `vit_so150m_patch16_reg4_gap_256.sbb_e250_in12k_ft_in1k` - 86.7% top-1
  * `vit_so150m_patch16_reg4_gap_384.sbb_e250_in12k_ft_in1k` - 87.4% top-1
  * `vit_so150m_patch16_reg4_gap_256.sbb_e250_in12k`
* Misc typing, typo, etc. cleanup
* 1.0.14 release to get above LeViT fix out

## Jan 9, 2025
* Add support to train and validate in pure `bfloat16` or `float16`
* `wandb` project name arg added by https://github.com/caojiaolong, use arg.experiment for name
* Fix old issue w/ checkpoint saving not working on filesystem w/o hard-link support (e.g. FUSE fs mounts)
* 1.0.13 release

## Jan 6, 2025
* Add `torch.utils.checkpoint.checkpoint()` wrapper in `timm.models` that defaults `use_reentrant=False`, unless `TIMM_REENTRANT_CKPT=1` is set in env.

## Dec 31, 2024
* `convnext_nano` 384x384 ImageNet-12k pretrain & fine-tune. https://huggingface.co/models?search=convnext_nano%20r384
* Add AIM-v2 encoders from https://github.com/apple/ml-aim, see on Hub: https://huggingface.co/models?search=timm%20aimv2
* Add PaliGemma2 encoders from https://github.com/google-research/big_vision to existing PaliGemma, see on Hub: https://huggingface.co/models?search=timm%20pali2
* Add missing L/14 DFN2B 39B CLIP ViT, `vit_large_patch14_clip_224.dfn2b_s39b`
* Fix existing `RmsNorm` layer & fn to match standard formulation, use PT 2.5 impl when possible. Move old impl to `SimpleNorm` layer, it's LN w/o centering or bias. There were only two `timm` models using it, and they have been updated.
* Allow override of `cache_dir` arg for model creation
* Pass through `trust_remote_code` for HF datasets wrapper
* `inception_next_atto` model added by creator
* Adan optimizer caution, and Lamb decoupled weighgt decay options
* Some feature_info metadata fixed by https://github.com/brianhou0208
* All OpenCLIP and JAX (CLIP, SigLIP, Pali, etc) model weights that used load time remapping were given their own HF Hub instances so that they work with `hf-hub:` based loading, and thus will work with new Transformers `TimmWrapperModel`

## Nov 28, 2024
* More optimizers
  * Add MARS optimizer (https://arxiv.org/abs/2411.10438, https://github.com/AGI-Arena/MARS)
  * Add LaProp optimizer (https://arxiv.org/abs/2002.04839, https://github.com/Z-T-WANG/LaProp-Optimizer)
  * Add masking from 'Cautious Optimizers' (https://arxiv.org/abs/2411.16085, https://github.com/kyleliang919/C-Optim) to Adafactor, Adafactor Big Vision, AdamW (legacy), Adopt, Lamb, LaProp, Lion, NadamW, RMSPropTF, SGDW
  * Cleanup some docstrings and type annotations re optimizers and factory
* Add MobileNet-V4 Conv Medium models pretrained on in12k and fine-tuned in1k @ 384x384
  * https://huggingface.co/timm/mobilenetv4_conv_medium.e250_r384_in12k_ft_in1k
  * https://huggingface.co/timm/mobilenetv4_conv_medium.e250_r384_in12k
  * https://huggingface.co/timm/mobilenetv4_conv_medium.e180_ad_r384_in12k
  * https://huggingface.co/timm/mobilenetv4_conv_medium.e180_r384_in12k
* Add small cs3darknet, quite good for the speed
  * https://huggingface.co/timm/cs3darknet_focus_s.ra4_e3600_r256_in1k

## Nov 12, 2024
* Optimizer factory refactor
  * New factory works by registering optimizers using an OptimInfo dataclass w/ some key traits
  * Add `list_optimizers`, `get_optimizer_class`, `get_optimizer_info` to reworked `create_optimizer_v2` fn to explore optimizers, get info or class
  * deprecate `optim.optim_factory`, move fns to `optim/_optim_factory.py` and `optim/_param_groups.py` and encourage import via `timm.optim`
* Add Adopt (https://github.com/iShohei220/adopt) optimizer
* Add 'Big Vision' variant of Adafactor (https://github.com/google-research/big_vision/blob/main/big_vision/optax.py) optimizer
* Fix original Adafactor to pick better factorization dims for convolutions
* Tweak LAMB optimizer with some improvements in torch.where functionality since original, refactor clipping a bit
* dynamic img size support in vit, deit, eva improved to support resize from non-square patch grids, thanks https://github.com/wojtke
*
## Oct 31, 2024
Add a set of new very well trained ResNet & ResNet-V2 18/34 (basic block) weights. See https://huggingface.co/blog/rwightman/resnet-trick-or-treat

## Oct 19, 2024
* Cleanup torch amp usage to avoid cuda specific calls, merge support for Ascend (NPU) devices from [MengqingCao](https://github.com/MengqingCao) that should work now in PyTorch 2.5 w/ new device extension autoloading feature. Tested Intel Arc (XPU) in Pytorch 2.5 too and it (mostly) worked.

## Oct 16, 2024
* Fix error on importing from deprecated path `timm.models.registry`, increased priority of existing deprecation warnings to be visible
* Port weights of InternViT-300M (https://huggingface.co/OpenGVLab/InternViT-300M-448px) to `timm` as `vit_intern300m_patch14_448`

### Oct 14, 2024
* Pre-activation (ResNetV2) version of 18/18d/34/34d ResNet model defs added by request (weights pending)
* Release 1.0.10

### Oct 11, 2024
* MambaOut (https://github.com/yuweihao/MambaOut) model & weights added. A cheeky take on SSM vision models w/o the SSM (essentially ConvNeXt w/ gating). A mix of original weights + custom variations & weights.

|model                                                                                                                |img_size|top1  |top5  |param_count|
|---------------------------------------------------------------------------------------------------------------------|--------|------|------|-----------|
|[mambaout_base_plus_rw.sw_e150_r384_in12k_ft_in1k](http://huggingface.co/timm/mambaout_base_plus_rw.sw_e150_r384_in12k_ft_in1k)|384     |87.506|98.428|101.66     |
|[mambaout_base_plus_rw.sw_e150_in12k_ft_in1k](http://huggingface.co/timm/mambaout_base_plus_rw.sw_e150_in12k_ft_in1k)|288     |86.912|98.236|101.66     |
|[mambaout_base_plus_rw.sw_e150_in12k_ft_in1k](http://huggingface.co/timm/mambaout_base_plus_rw.sw_e150_in12k_ft_in1k)|224     |86.632|98.156|101.66     |
|[mambaout_base_tall_rw.sw_e500_in1k](http://huggingface.co/timm/mambaout_base_tall_rw.sw_e500_in1k)                  |288     |84.974|97.332|86.48      |
|[mambaout_base_wide_rw.sw_e500_in1k](http://huggingface.co/timm/mambaout_base_wide_rw.sw_e500_in1k)                  |288     |84.962|97.208|94.45      |
|[mambaout_base_short_rw.sw_e500_in1k](http://huggingface.co/timm/mambaout_base_short_rw.sw_e500_in1k)                |288     |84.832|97.27 |88.83      |
|[mambaout_base.in1k](http://huggingface.co/timm/mambaout_base.in1k)                                                  |288     |84.72 |96.93 |84.81      |
|[mambaout_small_rw.sw_e450_in1k](http://huggingface.co/timm/mambaout_small_rw.sw_e450_in1k)                          |288     |84.598|97.098|48.5       |
|[mambaout_small.in1k](http://huggingface.co/timm/mambaout_small.in1k)                                                |288     |84.5  |96.974|48.49      |
|[mambaout_base_wide_rw.sw_e500_in1k](http://huggingface.co/timm/mambaout_base_wide_rw.sw_e500_in1k)                  |224     |84.454|96.864|94.45      |
|[mambaout_base_tall_rw.sw_e500_in1k](http://huggingface.co/timm/mambaout_base_tall_rw.sw_e500_in1k)                  |224     |84.434|96.958|86.48      |
|[mambaout_base_short_rw.sw_e500_in1k](http://huggingface.co/timm/mambaout_base_short_rw.sw_e500_in1k)                |224     |84.362|96.952|88.83      |
|[mambaout_base.in1k](http://huggingface.co/timm/mambaout_base.in1k)                                                  |224     |84.168|96.68 |84.81      |
|[mambaout_small.in1k](http://huggingface.co/timm/mambaout_small.in1k)                                                |224     |84.086|96.63 |48.49      |
|[mambaout_small_rw.sw_e450_in1k](http://huggingface.co/timm/mambaout_small_rw.sw_e450_in1k)                          |224     |84.024|96.752|48.5       |
|[mambaout_tiny.in1k](http://huggingface.co/timm/mambaout_tiny.in1k)                                                  |288     |83.448|96.538|26.55      |
|[mambaout_tiny.in1k](http://huggingface.co/timm/mambaout_tiny.in1k)                                                  |224     |82.736|96.1  |26.55      |
|[mambaout_kobe.in1k](http://huggingface.co/timm/mambaout_kobe.in1k)                                                  |288     |81.054|95.718|9.14       |
|[mambaout_kobe.in1k](http://huggingface.co/timm/mambaout_kobe.in1k)                                                  |224     |79.986|94.986|9.14       |
|[mambaout_femto.in1k](http://huggingface.co/timm/mambaout_femto.in1k)                                                |288     |79.848|95.14 |7.3        |
|[mambaout_femto.in1k](http://huggingface.co/timm/mambaout_femto.in1k)                                                |224     |78.87 |94.408|7.3        |

* SigLIP SO400M ViT fine-tunes on ImageNet-1k @ 378x378, added 378x378 option for existing SigLIP 384x384 models
  *  [vit_so400m_patch14_siglip_378.webli_ft_in1k](https://huggingface.co/timm/vit_so400m_patch14_siglip_378.webli_ft_in1k) - 89.42 top-1
  *  [vit_so400m_patch14_siglip_gap_378.webli_ft_in1k](https://huggingface.co/timm/vit_so400m_patch14_siglip_gap_378.webli_ft_in1k) - 89.03
* SigLIP SO400M ViT encoder from recent multi-lingual (i18n) variant, patch16 @ 256x256 (https://huggingface.co/timm/ViT-SO400M-16-SigLIP-i18n-256). OpenCLIP update pending.
* Add two ConvNeXt 'Zepto' models & weights (one w/ overlapped stem and one w/ patch stem). Uses RMSNorm, smaller than previous 'Atto', 2.2M params.
  * [convnext_zepto_rms_ols.ra4_e3600_r224_in1k](https://huggingface.co/timm/convnext_zepto_rms_ols.ra4_e3600_r224_in1k) - 73.20 top-1 @ 224
  * [convnext_zepto_rms.ra4_e3600_r224_in1k](https://huggingface.co/timm/convnext_zepto_rms.ra4_e3600_r224_in1k) - 72.81 @ 224

### Sept 2024
* Add a suite of tiny test models for improved unit tests and niche low-resource applications (https://huggingface.co/blog/rwightman/timm-tiny-test)
* Add MobileNetV4-Conv-Small (0.5x) model (https://huggingface.co/posts/rwightman/793053396198664)
  * [mobilenetv4_conv_small_050.e3000_r224_in1k](http://hf.co/timm/mobilenetv4_conv_small_050.e3000_r224_in1k) - 65.81 top-1 @ 256, 64.76 @ 224
* Add MobileNetV3-Large variants trained with MNV4 Small recipe
  * [mobilenetv3_large_150d.ra4_e3600_r256_in1k](http://hf.co/timm/mobilenetv3_large_150d.ra4_e3600_r256_in1k) - 81.81 @ 320, 80.94 @ 256
  * [mobilenetv3_large_100.ra4_e3600_r224_in1k](http://hf.co/timm/mobilenetv3_large_100.ra4_e3600_r224_in1k) - 77.16 @ 256, 76.31 @ 224

### Aug 21, 2024
* Updated SBB ViT models trained on ImageNet-12k and fine-tuned on ImageNet-1k, challenging quite a number of much larger, slower models

| model | top1 | top5 | param_count | img_size |
| -------------------------------------------------- | ------ | ------ | ----------- | -------- |
| [vit_mediumd_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1k](https://huggingface.co/timm/vit_mediumd_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1k) | 87.438 | 98.256 | 64.11 | 384 |
| [vit_mediumd_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1k](https://huggingface.co/timm/vit_mediumd_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1k) | 86.608 | 97.934 | 64.11 | 256 |
| [vit_betwixt_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1k](https://huggingface.co/timm/vit_betwixt_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1k) | 86.594 | 98.02 | 60.4 | 384 |
| [vit_betwixt_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1k](https://huggingface.co/timm/vit_betwixt_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1k) | 85.734 | 97.61 | 60.4 | 256 |
* MobileNet-V1 1.25, EfficientNet-B1, & ResNet50-D weights w/ MNV4 baseline challenge recipe

| model                                                                                                                    | top1   | top5   | param_count | img_size |
|--------------------------------------------------------------------------------------------------------------------------|--------|--------|-------------|----------|
| [resnet50d.ra4_e3600_r224_in1k](http://hf.co/timm/resnet50d.ra4_e3600_r224_in1k)                                         | 81.838 | 95.922 | 25.58       | 288      |
| [efficientnet_b1.ra4_e3600_r240_in1k](http://hf.co/timm/efficientnet_b1.ra4_e3600_r240_in1k)                             | 81.440 | 95.700 | 7.79        | 288      |
| [resnet50d.ra4_e3600_r224_in1k](http://hf.co/timm/resnet50d.ra4_e3600_r224_in1k)                                         | 80.952 | 95.384 | 25.58       | 224      |
| [efficientnet_b1.ra4_e3600_r240_in1k](http://hf.co/timm/efficientnet_b1.ra4_e3600_r240_in1k)                             | 80.406 | 95.152 | 7.79        | 240      |
| [mobilenetv1_125.ra4_e3600_r224_in1k](http://hf.co/timm/mobilenetv1_125.ra4_e3600_r224_in1k)                             | 77.600 | 93.804 | 6.27        | 256      |
| [mobilenetv1_125.ra4_e3600_r224_in1k](http://hf.co/timm/mobilenetv1_125.ra4_e3600_r224_in1k)                             | 76.924 | 93.234 | 6.27        | 224      |

* Add SAM2 (HieraDet) backbone arch & weight loading support
* Add Hiera Small weights trained w/ abswin pos embed on in12k & fine-tuned on 1k

|model                            |top1  |top5  |param_count|
|---------------------------------|------|------|-----------|
|hiera_small_abswin_256.sbb2_e200_in12k_ft_in1k    |84.912|97.260|35.01      |
|hiera_small_abswin_256.sbb2_pd_e200_in12k_ft_in1k |84.560|97.106|35.01      |

### Aug 8, 2024
* Add RDNet ('DenseNets Reloaded', https://arxiv.org/abs/2403.19588), thanks [Donghyun Kim](https://github.com/dhkim0225)

### July 28, 2024
* Add `mobilenet_edgetpu_v2_m` weights w/ `ra4` mnv4-small based recipe. 80.1% top-1 @ 224 and 80.7 @ 256.
* Release 1.0.8

### July 26, 2024
* More MobileNet-v4 weights, ImageNet-12k pretrain w/ fine-tunes, and anti-aliased ConvLarge models

| model                                                                                            |top1  |top1_err|top5  |top5_err|param_count|img_size|
|--------------------------------------------------------------------------------------------------|------|--------|------|--------|-----------|--------|
| [mobilenetv4_conv_aa_large.e230_r448_in12k_ft_in1k](http://hf.co/timm/mobilenetv4_conv_aa_large.e230_r448_in12k_ft_in1k)|84.99 |15.01   |97.294|2.706   |32.59      |544     |
| [mobilenetv4_conv_aa_large.e230_r384_in12k_ft_in1k](http://hf.co/timm/mobilenetv4_conv_aa_large.e230_r384_in12k_ft_in1k)|84.772|15.228  |97.344|2.656   |32.59      |480     |
| [mobilenetv4_conv_aa_large.e230_r448_in12k_ft_in1k](http://hf.co/timm/mobilenetv4_conv_aa_large.e230_r448_in12k_ft_in1k)|84.64 |15.36   |97.114|2.886   |32.59      |448     |
| [mobilenetv4_conv_aa_large.e230_r384_in12k_ft_in1k](http://hf.co/timm/mobilenetv4_conv_aa_large.e230_r384_in12k_ft_in1k)|84.314|15.686  |97.102|2.898   |32.59      |384     |
| [mobilenetv4_conv_aa_large.e600_r384_in1k](http://hf.co/timm/mobilenetv4_conv_aa_large.e600_r384_in1k)     |83.824|16.176  |96.734|3.266   |32.59      |480     |
| [mobilenetv4_conv_aa_large.e600_r384_in1k](http://hf.co/timm/mobilenetv4_conv_aa_large.e600_r384_in1k)             |83.244|16.756  |96.392|3.608   |32.59      |384     |
| [mobilenetv4_hybrid_medium.e200_r256_in12k_ft_in1k](http://hf.co/timm/mobilenetv4_hybrid_medium.e200_r256_in12k_ft_in1k)|82.99 |17.01   |96.67 |3.33    |11.07      |320     |
| [mobilenetv4_hybrid_medium.e200_r256_in12k_ft_in1k](http://hf.co/timm/mobilenetv4_hybrid_medium.e200_r256_in12k_ft_in1k)|82.364|17.636  |96.256|3.744   |11.07      |256     |

* Impressive MobileNet-V1 and EfficientNet-B0 baseline challenges (https://huggingface.co/blog/rwightman/mobilenet-baselines)

| model                                                                                            |top1  |top1_err|top5  |top5_err|param_count|img_size|
|--------------------------------------------------------------------------------------------------|------|--------|------|--------|-----------|--------|
| [efficientnet_b0.ra4_e3600_r224_in1k](http://hf.co/timm/efficientnet_b0.ra4_e3600_r224_in1k)                       |79.364|20.636  |94.754|5.246   |5.29       |256     |
| [efficientnet_b0.ra4_e3600_r224_in1k](http://hf.co/timm/efficientnet_b0.ra4_e3600_r224_in1k)                       |78.584|21.416  |94.338|5.662   |5.29       |224     |
| [mobilenetv1_100h.ra4_e3600_r224_in1k](http://hf.co/timm/mobilenetv1_100h.ra4_e3600_r224_in1k)                     |76.596|23.404  |93.272|6.728   |5.28       |256     |
| [mobilenetv1_100.ra4_e3600_r224_in1k](http://hf.co/timm/mobilenetv1_100.ra4_e3600_r224_in1k)                       |76.094|23.906  |93.004|6.996   |4.23       |256     |
| [mobilenetv1_100h.ra4_e3600_r224_in1k](http://hf.co/timm/mobilenetv1_100h.ra4_e3600_r224_in1k)                     |75.662|24.338  |92.504|7.496   |5.28       |224     |
| [mobilenetv1_100.ra4_e3600_r224_in1k](http://hf.co/timm/mobilenetv1_100.ra4_e3600_r224_in1k)                       |75.382|24.618  |92.312|7.688   |4.23       |224     |

* Prototype of `set_input_size()` added to vit and swin v1/v2 models to allow changing image size, patch size, window size after model creation.
* Improved support in swin for different size handling, in addition to `set_input_size`, `always_partition` and `strict_img_size` args have been added to `__init__` to allow more flexible input size constraints
* Fix out of order indices info for intermediate 'Getter' feature wrapper, check out or range indices for same.
* Add several `tiny` < .5M param models for testing that are actually trained on ImageNet-1k

|model                       |top1  |top1_err|top5  |top5_err|param_count|img_size|crop_pct|
|----------------------------|------|--------|------|--------|-----------|--------|--------|
|test_efficientnet.r160_in1k |47.156|52.844  |71.726|28.274  |0.36       |192     |1.0     |
|test_byobnet.r160_in1k      |46.698|53.302  |71.674|28.326  |0.46       |192     |1.0     |
|test_efficientnet.r160_in1k |46.426|53.574  |70.928|29.072  |0.36       |160     |0.875   |
|test_byobnet.r160_in1k      |45.378|54.622  |70.572|29.428  |0.46       |160     |0.875   |
|test_vit.r160_in1k|42.0  |58.0    |68.664|31.336  |0.37       |192     |1.0     |
|test_vit.r160_in1k|40.822|59.178  |67.212|32.788  |0.37       |160     |0.875   |

* Fix vit reg token init, thanks [Promisery](https://github.com/Promisery)
* Other misc fixes

### June 24, 2024
* 3 more MobileNetV4 hyrid weights with different MQA weight init scheme

| model                                                                                            |top1  |top1_err|top5  |top5_err|param_count|img_size|
|--------------------------------------------------------------------------------------------------|------|--------|------|--------|-----------|--------|
| [mobilenetv4_hybrid_large.ix_e600_r384_in1k](http://hf.co/timm/mobilenetv4_hybrid_large.ix_e600_r384_in1k) |84.356|15.644  |96.892 |3.108  |37.76      |448     |
| [mobilenetv4_hybrid_large.ix_e600_r384_in1k](http://hf.co/timm/mobilenetv4_hybrid_large.ix_e600_r384_in1k) |83.990|16.010  |96.702 |3.298  |37.76      |384     |
| [mobilenetv4_hybrid_medium.ix_e550_r384_in1k](http://hf.co/timm/mobilenetv4_hybrid_medium.ix_e550_r384_in1k)       |83.394|16.606  |96.760|3.240   |11.07      |448     |
| [mobilenetv4_hybrid_medium.ix_e550_r384_in1k](http://hf.co/timm/mobilenetv4_hybrid_medium.ix_e550_r384_in1k)       |82.968|17.032  |96.474|3.526   |11.07      |384     |
| [mobilenetv4_hybrid_medium.ix_e550_r256_in1k](http://hf.co/timm/mobilenetv4_hybrid_medium.ix_e550_r256_in1k)       |82.492|17.508  |96.278|3.722   |11.07      |320     |
| [mobilenetv4_hybrid_medium.ix_e550_r256_in1k](http://hf.co/timm/mobilenetv4_hybrid_medium.ix_e550_r256_in1k)       |81.446|18.554  |95.704|4.296   |11.07      |256     |
* florence2 weight loading in DaViT model

### June 12, 2024
* MobileNetV4 models and initial set of `timm` trained weights added:

| model                                                                                            |top1  |top1_err|top5  |top5_err|param_count|img_size|
|--------------------------------------------------------------------------------------------------|------|--------|------|--------|-----------|--------|
| [mobilenetv4_hybrid_large.e600_r384_in1k](http://hf.co/timm/mobilenetv4_hybrid_large.e600_r384_in1k) |84.266|15.734  |96.936 |3.064  |37.76      |448     |
| [mobilenetv4_hybrid_large.e600_r384_in1k](http://hf.co/timm/mobilenetv4_hybrid_large.e600_r384_in1k) |83.800|16.200  |96.770 |3.230  |37.76      |384     |
| [mobilenetv4_conv_large.e600_r384_in1k](http://hf.co/timm/mobilenetv4_conv_large.e600_r384_in1k) |83.392|16.608  |96.622 |3.378  |32.59      |448     |
| [mobilenetv4_conv_large.e600_r384_in1k](http://hf.co/timm/mobilenetv4_conv_large.e600_r384_in1k) |82.952|17.048  |96.266 |3.734  |32.59      |384     |
| [mobilenetv4_conv_large.e500_r256_in1k](http://hf.co/timm/mobilenetv4_conv_large.e500_r256_in1k) |82.674|17.326  |96.31 |3.69    |32.59      |320     |
| [mobilenetv4_conv_large.e500_r256_in1k](http://hf.co/timm/mobilenetv4_conv_large.e500_r256_in1k)                   |81.862|18.138  |95.69 |4.31    |32.59      |256     |
| [mobilenetv4_hybrid_medium.e500_r224_in1k](http://hf.co/timm/mobilenetv4_hybrid_medium.e500_r224_in1k)             |81.276|18.724  |95.742|4.258   |11.07      |256     |
| [mobilenetv4_conv_medium.e500_r256_in1k](http://hf.co/timm/mobilenetv4_conv_medium.e500_r256_in1k)                 |80.858|19.142  |95.768|4.232   |9.72       |320     |
| [mobilenetv4_hybrid_medium.e500_r224_in1k](http://hf.co/timm/mobilenetv4_hybrid_medium.e500_r224_in1k)             |80.442|19.558  |95.38 |4.62    |11.07      |224     |
| [mobilenetv4_conv_blur_medium.e500_r224_in1k](http://hf.co/timm/mobilenetv4_conv_blur_medium.e500_r224_in1k)       |80.142|19.858  |95.298|4.702   |9.72       |256     |
| [mobilenetv4_conv_medium.e500_r256_in1k](http://hf.co/timm/mobilenetv4_conv_medium.e500_r256_in1k)                 |79.928|20.072  |95.184|4.816   |9.72       |256     |
| [mobilenetv4_conv_medium.e500_r224_in1k](http://hf.co/timm/mobilenetv4_conv_medium.e500_r224_in1k)                 |79.808|20.192  |95.186|4.814   |9.72       |256     |
| [mobilenetv4_conv_blur_medium.e500_r224_in1k](http://hf.co/timm/mobilenetv4_conv_blur_medium.e500_r224_in1k)       |79.438|20.562  |94.932|5.068   |9.72       |224     |
| [mobilenetv4_conv_medium.e500_r224_in1k](http://hf.co/timm/mobilenetv4_conv_medium.e500_r224_in1k)                 |79.094|20.906  |94.77 |5.23    |9.72       |224     |
| [mobilenetv4_conv_small.e2400_r224_in1k](http://hf.co/timm/mobilenetv4_conv_small.e2400_r224_in1k)                 |74.616|25.384  |92.072|7.928   |3.77       |256     |
| [mobilenetv4_conv_small.e1200_r224_in1k](http://hf.co/timm/mobilenetv4_conv_small.e1200_r224_in1k)                 |74.292|25.708  |92.116|7.884   |3.77       |256     |
| [mobilenetv4_conv_small.e2400_r224_in1k](http://hf.co/timm/mobilenetv4_conv_small.e2400_r224_in1k)                 |73.756|26.244  |91.422|8.578   |3.77       |224     |
| [mobilenetv4_conv_small.e1200_r224_in1k](http://hf.co/timm/mobilenetv4_conv_small.e1200_r224_in1k)                 |73.454|26.546  |91.34 |8.66    |3.77       |224     |

* Apple MobileCLIP (https://arxiv.org/pdf/2311.17049, FastViT and ViT-B) image tower model support & weights added (part of OpenCLIP support).
* ViTamin (https://arxiv.org/abs/2404.02132) CLIP image tower model & weights added (part of OpenCLIP support).
* OpenAI CLIP Modified ResNet image tower modelling & weight support (via ByobNet). Refactor AttentionPool2d.

### May 14, 2024
* Support loading PaliGemma jax weights into SigLIP ViT models with average pooling.
* Add Hiera models from Meta (https://github.com/facebookresearch/hiera).
* Add `normalize=` flag for transforms, return non-normalized torch.Tensor with original dytpe (for `chug`)
* Version 1.0.3 release

### May 11, 2024
* `Searching for Better ViT Baselines (For the GPU Poor)` weights and vit variants released. Exploring model shapes between Tiny and Base.

| model | top1 | top5 | param_count | img_size |
| -------------------------------------------------- | ------ | ------ | ----------- | -------- |
| [vit_mediumd_patch16_reg4_gap_256.sbb_in12k_ft_in1k](https://huggingface.co/timm/vit_mediumd_patch16_reg4_gap_256.sbb_in12k_ft_in1k) | 86.202 | 97.874 | 64.11 | 256 |
| [vit_betwixt_patch16_reg4_gap_256.sbb_in12k_ft_in1k](https://huggingface.co/timm/vit_betwixt_patch16_reg4_gap_256.sbb_in12k_ft_in1k)  | 85.418 | 97.48 | 60.4 | 256 |
| [vit_mediumd_patch16_rope_reg1_gap_256.sbb_in1k](https://huggingface.co/timm/vit_mediumd_patch16_rope_reg1_gap_256.sbb_in1k)  | 84.322 | 96.812 | 63.95 | 256 |
| [vit_betwixt_patch16_rope_reg4_gap_256.sbb_in1k](https://huggingface.co/timm/vit_betwixt_patch16_rope_reg4_gap_256.sbb_in1k)  | 83.906 | 96.684 | 60.23 | 256 |
| [vit_base_patch16_rope_reg1_gap_256.sbb_in1k](https://huggingface.co/timm/vit_base_patch16_rope_reg1_gap_256.sbb_in1k)  | 83.866 | 96.67 | 86.43 | 256 |
| [vit_medium_patch16_rope_reg1_gap_256.sbb_in1k](https://huggingface.co/timm/vit_medium_patch16_rope_reg1_gap_256.sbb_in1k)  | 83.81 | 96.824 | 38.74 | 256 |
| [vit_betwixt_patch16_reg4_gap_256.sbb_in1k](https://huggingface.co/timm/vit_betwixt_patch16_reg4_gap_256.sbb_in1k)  | 83.706 | 96.616 | 60.4 | 256 |
| [vit_betwixt_patch16_reg1_gap_256.sbb_in1k](https://huggingface.co/timm/vit_betwixt_patch16_reg1_gap_256.sbb_in1k)  | 83.628 | 96.544 | 60.4 | 256 |
| [vit_medium_patch16_reg4_gap_256.sbb_in1k](https://huggingface.co/timm/vit_medium_patch16_reg4_gap_256.sbb_in1k)  | 83.47 | 96.622 | 38.88 | 256 |
| [vit_medium_patch16_reg1_gap_256.sbb_in1k](https://huggingface.co/timm/vit_medium_patch16_reg1_gap_256.sbb_in1k)  | 83.462 | 96.548 | 38.88 | 256 |
| [vit_little_patch16_reg4_gap_256.sbb_in1k](https://huggingface.co/timm/vit_little_patch16_reg4_gap_256.sbb_in1k)  | 82.514 | 96.262 | 22.52 | 256 |
| [vit_wee_patch16_reg1_gap_256.sbb_in1k](https://huggingface.co/timm/vit_wee_patch16_reg1_gap_256.sbb_in1k)  | 80.256 | 95.360 | 13.42 | 256 |
| [vit_pwee_patch16_reg1_gap_256.sbb_in1k](https://huggingface.co/timm/vit_pwee_patch16_reg1_gap_256.sbb_in1k)  | 80.072 | 95.136 | 15.25 | 256 |
| [vit_mediumd_patch16_reg4_gap_256.sbb_in12k](https://huggingface.co/timm/vit_mediumd_patch16_reg4_gap_256.sbb_in12k) | N/A | N/A | 64.11 | 256 |
| [vit_betwixt_patch16_reg4_gap_256.sbb_in12k](https://huggingface.co/timm/vit_betwixt_patch16_reg4_gap_256.sbb_in12k)  | N/A | N/A | 60.4 | 256 |

* AttentionExtract helper added to extract attention maps from `timm` models. See example in https://github.com/huggingface/pytorch-image-models/discussions/1232#discussioncomment-9320949
* `forward_intermediates()` API refined and added to more models including some ConvNets that have other extraction methods.
* 1017 of 1047 model architectures support `features_only=True` feature extraction. Remaining 34 architectures can be supported but based on priority requests.
* Remove torch.jit.script annotated functions including old JIT activations. Conflict with dynamo and dynamo does a much better job when used.

### April 11, 2024
* Prepping for a long overdue 1.0 release, things have been stable for a while now.
* Significant feature that's been missing for a while, `features_only=True` support for ViT models with flat hidden states or non-std module layouts (so far covering  `'vit_*', 'twins_*', 'deit*', 'beit*', 'mvitv2*', 'eva*', 'samvit_*', 'flexivit*'`)
* Above feature support achieved through a new `forward_intermediates()` API that can be used with a feature wrapping module or directly.
```python
model = timm.create_model('vit_base_patch16_224')
final_feat, intermediates = model.forward_intermediates(input)
output = model.forward_head(final_feat)  # pooling + classifier head

print(final_feat.shape)
torch.Size([2, 197, 768])

for f in intermediates:
    print(f.shape)
torch.Size([2, 768, 14, 14])
torch.Size([2, 768, 14, 14])
torch.Size([2, 768, 14, 14])
torch.Size([2, 768, 14, 14])
torch.Size([2, 768, 14, 14])
torch.Size([2, 768, 14, 14])
torch.Size([2, 768, 14, 14])
torch.Size([2, 768, 14, 14])
torch.Size([2, 768, 14, 14])
torch.Size([2, 768, 14, 14])
torch.Size([2, 768, 14, 14])
torch.Size([2, 768, 14, 14])

print(output.shape)
torch.Size([2, 1000])
```

```python
model = timm.create_model('eva02_base_patch16_clip_224', pretrained=True, img_size=512, features_only=True, out_indices=(-3, -2,))
output = model(torch.randn(2, 3, 512, 512))

for o in output:
    print(o.shape)
torch.Size([2, 768, 32, 32])
torch.Size([2, 768, 32, 32])
```
* TinyCLIP vision tower weights added, thx [Thien Tran](https://github.com/gau-nernst)

### Feb 19, 2024
* Next-ViT models added. Adapted from https://github.com/bytedance/Next-ViT
* HGNet and PP-HGNetV2 models added. Adapted from https://github.com/PaddlePaddle/PaddleClas by [SeeFun](https://github.com/seefun)
* Removed setup.py, moved to pyproject.toml based build supported by PDM
* Add updated model EMA impl using _for_each for less overhead
* Support device args in train script for non GPU devices
* Other misc fixes and small additions
* Min supported Python version increased to 3.8
* Release 0.9.16

### Jan 8, 2024
Datasets & transform refactoring
* HuggingFace streaming (iterable) dataset support (`--dataset hfids:org/dataset`)
* Webdataset wrapper tweaks for improved split info fetching, can auto fetch splits from supported HF hub webdataset
* Tested HF `datasets` and webdataset wrapper streaming from HF hub with recent `timm` ImageNet uploads to https://huggingface.co/timm
* Make input & target column/field keys consistent across datasets and pass via args
* Full monochrome support when using e:g: `--input-size 1 224 224` or `--in-chans 1`, sets PIL image conversion appropriately in dataset
* Improved several alternate crop & resize transforms (ResizeKeepRatio, RandomCropOrPad, etc) for use in PixParse document AI project
* Add SimCLR style color jitter prob along with grayscale and gaussian blur options to augmentations and args
* Allow train without validation set (`--val-split ''`) in train script
* Add `--bce-sum` (sum over class dim) and `--bce-pos-weight` (positive weighting) args for training as they're common BCE loss tweaks I was often hard coding

### Nov 23, 2023
* Added EfficientViT-Large models, thanks [SeeFun](https://github.com/seefun)
* Fix Python 3.7 compat, will be dropping support for it soon
* Other misc fixes
* Release 0.9.12

### Nov 20, 2023
* Added significant flexibility for Hugging Face Hub based timm models via `model_args` config entry. `model_args` will be passed as kwargs through to models on creation.
  * See example at https://huggingface.co/gaunernst/vit_base_patch16_1024_128.audiomae_as2m_ft_as20k/blob/main/config.json
  * Usage: https://github.com/huggingface/pytorch-image-models/discussions/2035
* Updated imagenet eval and test set csv files with latest models
* `vision_transformer.py` typing and doc cleanup by [Laureηt](https://github.com/Laurent2916)
* 0.9.11 release

### Nov 3, 2023
* [DFN (Data Filtering Networks)](https://huggingface.co/papers/2309.17425) and [MetaCLIP](https://huggingface.co/papers/2309.16671) ViT weights added
* DINOv2 'register' ViT model weights added (https://huggingface.co/papers/2309.16588, https://huggingface.co/papers/2304.07193)
* Add `quickgelu` ViT variants for OpenAI, DFN, MetaCLIP weights that use it (less efficient)
* Improved typing added to ResNet, MobileNet-v3 thanks to [Aryan](https://github.com/a-r-r-o-w)
* ImageNet-12k fine-tuned (from LAION-2B CLIP) `convnext_xxlarge`
* 0.9.9 release

### Oct 20, 2023
* [SigLIP](https://huggingface.co/papers/2303.15343) image tower weights supported in `vision_transformer.py`.
  * Great potential for fine-tune and downstream feature use.
* Experimental 'register' support in vit models as per [Vision Transformers Need Registers](https://huggingface.co/papers/2309.16588)
* Updated RepViT with new weight release. Thanks [wangao](https://github.com/jameslahm)
* Add patch resizing support (on pretrained weight load) to Swin models
* 0.9.8 release pending

### Sep 1, 2023
* TinyViT added by [SeeFun](https://github.com/seefun)
* Fix EfficientViT (MIT) to use torch.autocast so it works back to PT 1.10
* 0.9.7 release

### Aug 28, 2023
* Add dynamic img size support to models in `vision_transformer.py`, `vision_transformer_hybrid.py`, `deit.py`, and `eva.py` w/o breaking backward compat.
  * Add `dynamic_img_size=True` to args at model creation time to allow changing the grid size (interpolate abs and/or ROPE pos embed each forward pass).
  * Add `dynamic_img_pad=True` to allow image sizes that aren't divisible by patch size (pad bottom right to patch size each forward pass).
  * Enabling either dynamic mode will break FX tracing unless PatchEmbed module added as leaf.
  * Existing method of resizing position embedding by passing different `img_size` (interpolate pretrained embed weights once) on creation still works.
  * Existing method of changing `patch_size` (resize pretrained patch_embed weights once) on creation still works.
  * Example validation cmd `python validate.py --data-dir /imagenet --model vit_base_patch16_224 --amp --amp-dtype bfloat16 --img-size 255 --crop-pct 1.0 --model-kwargs dynamic_img_size=True dyamic_img_pad=True`

### Aug 25, 2023
* Many new models since last release
  * FastViT - https://arxiv.org/abs/2303.14189
  * MobileOne - https://arxiv.org/abs/2206.04040
  * InceptionNeXt - https://arxiv.org/abs/2303.16900
  * RepGhostNet - https://arxiv.org/abs/2211.06088 (thanks https://github.com/ChengpengChen)
  * GhostNetV2 - https://arxiv.org/abs/2211.12905 (thanks https://github.com/yehuitang)
  * EfficientViT (MSRA) - https://arxiv.org/abs/2305.07027 (thanks https://github.com/seefun)
  * EfficientViT (MIT) - https://arxiv.org/abs/2205.14756 (thanks https://github.com/seefun)
* Add `--reparam` arg to `benchmark.py`, `onnx_export.py`, and `validate.py` to trigger layer reparameterization / fusion for models with any one of `reparameterize()`, `switch_to_deploy()` or `fuse()`
  * Including FastViT, MobileOne, RepGhostNet, EfficientViT (MSRA), RepViT, RepVGG, and LeViT
* Preparing 0.9.6 'back to school' release

### Aug 11, 2023
* Swin, MaxViT, CoAtNet, and BEiT models support resizing of image/window size on creation with adaptation of pretrained weights
* Example validation cmd to test w/ non-square resize `python validate.py --data-dir /imagenet --model swin_base_patch4_window7_224.ms_in22k_ft_in1k --amp --amp-dtype bfloat16 --input-size 3 256 320 --model-kwargs window_size=8,10 img_size=256,320`

### Aug 3, 2023
* Add GluonCV weights for HRNet w18_small and w18_small_v2. Converted by [SeeFun](https://github.com/seefun)
* Fix `selecsls*` model naming regression
* Patch and position embedding for ViT/EVA works for bfloat16/float16 weights on load (or activations for on-the-fly resize)
* v0.9.5 release prep

### July 27, 2023
* Added timm trained `seresnextaa201d_32x8d.sw_in12k_ft_in1k_384` weights (and `.sw_in12k` pretrain) with 87.3% top-1 on ImageNet-1k, best ImageNet ResNet family model I'm aware of.
* RepViT model and weights (https://arxiv.org/abs/2307.09283) added by [wangao](https://github.com/jameslahm)
* I-JEPA ViT feature weights (no classifier) added by [SeeFun](https://github.com/seefun)
* SAM-ViT (segment anything) feature weights (no classifier) added by [SeeFun](https://github.com/seefun)
* Add support for alternative feat extraction methods and -ve indices to EfficientNet
* Add NAdamW optimizer
* Misc fixes

### May 11, 2023
* `timm` 0.9 released, transition from 0.8.xdev releases

### May 10, 2023
* Hugging Face Hub downloading is now default, 1132 models on https://huggingface.co/timm, 1163 weights in `timm`
* DINOv2 vit feature backbone weights added thanks to [Leng Yue](https://github.com/leng-yue)
* FB MAE vit feature backbone weights added
* OpenCLIP DataComp-XL L/14 feat backbone weights added
* MetaFormer (poolformer-v2, caformer, convformer, updated poolformer (v1)) w/ weights added by [Fredo Guan](https://github.com/fffffgggg54)
* Experimental `get_intermediate_layers` function on vit/deit models for grabbing hidden states (inspired by DINO impl). This is WIP and may change significantly... feedback welcome.
* Model creation throws error if `pretrained=True` and no weights exist (instead of continuing with random initialization)
* Fix regression with inception / nasnet TF sourced weights with 1001 classes in original classifiers
* bitsandbytes (https://github.com/TimDettmers/bitsandbytes) optimizers added to factory, use `bnb` prefix, ie `bnbadam8bit`
* Misc cleanup and fixes
* Final testing before switching to a 0.9 and bringing `timm` out of pre-release state

### April 27, 2023
* 97% of `timm` models uploaded to HF Hub and almost all updated to support multi-weight pretrained configs
* Minor cleanup and refactoring of another batch of models as multi-weight added. More fused_attn (F.sdpa) and features_only support, and torchscript fixes.

### April 21, 2023
* Gradient accumulation support added to train script and tested (`--grad-accum-steps`), thanks [Taeksang Kim](https://github.com/voidbag)
* More weights on HF Hub (cspnet, cait, volo, xcit, tresnet, hardcorenas, densenet, dpn, vovnet, xception_aligned)
* Added `--head-init-scale` and `--head-init-bias` to train.py to scale classiifer head and set fixed bias for fine-tune
* Remove all InplaceABN (`inplace_abn`) use, replaced use in tresnet with standard BatchNorm (modified weights accordingly). 

### April 12, 2023
* Add ONNX export script, validate script, helpers that I've had kicking around for along time. Tweak 'same' padding for better export w/ recent ONNX + pytorch.
* Refactor dropout args for vit and vit-like models, separate drop_rate into `drop_rate` (classifier dropout), `proj_drop_rate` (block mlp / out projections), `pos_drop_rate` (position embedding drop), `attn_drop_rate` (attention dropout). Also add patch dropout (FLIP) to vit and eva models.
* fused F.scaled_dot_product_attention support to more vit models, add env var (TIMM_FUSED_ATTN) to control, and config interface to enable/disable
* Add EVA-CLIP backbones w/ image tower weights, all the way up to 4B param 'enormous' model, and 336x336 OpenAI ViT mode that was missed.

### April 5, 2023
* ALL ResNet models pushed to Hugging Face Hub with multi-weight support
  * All past `timm` trained weights added with recipe based tags to differentiate
  * All ResNet strikes back A1/A2/A3 (seed 0) and R50 example B/C1/C2/D weights available
  * Add torchvision v2 recipe weights to existing torchvision originals
  * See comparison table in https://huggingface.co/timm/seresnextaa101d_32x8d.sw_in12k_ft_in1k_288#model-comparison
* New ImageNet-12k + ImageNet-1k fine-tunes available for a few anti-aliased ResNet models
  * `resnetaa50d.sw_in12k_ft_in1k` - 81.7 @ 224, 82.6 @ 288
  * `resnetaa101d.sw_in12k_ft_in1k` - 83.5 @ 224, 84.1 @ 288
  * `seresnextaa101d_32x8d.sw_in12k_ft_in1k` - 86.0 @ 224, 86.5 @ 288 
  * `seresnextaa101d_32x8d.sw_in12k_ft_in1k_288` - 86.5 @ 288, 86.7 @ 320

### March 31, 2023
* Add first ConvNext-XXLarge CLIP -> IN-1k fine-tune and IN-12k intermediate fine-tunes for convnext-base/large CLIP models.

| model                                                                                                                |top1  |top5  |img_size|param_count|gmacs |macts |
|----------------------------------------------------------------------------------------------------------------------|------|------|--------|-----------|------|------|
| [convnext_xxlarge.clip_laion2b_soup_ft_in1k](https://huggingface.co/timm/convnext_xxlarge.clip_laion2b_soup_ft_in1k) |88.612|98.704|256     |846.47     |198.09|124.45|
| convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384                                                               |88.312|98.578|384     |200.13     |101.11|126.74|
| convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_320                                                               |87.968|98.47 |320     |200.13     |70.21 |88.02 |
| convnext_base.clip_laion2b_augreg_ft_in12k_in1k_384                                                                  |87.138|98.212|384     |88.59      |45.21 |84.49 |
| convnext_base.clip_laion2b_augreg_ft_in12k_in1k                                                                      |86.344|97.97 |256     |88.59      |20.09 |37.55 |

* Add EVA-02 MIM pretrained and fine-tuned weights, push to HF hub and update model cards for all EVA models. First model over 90% top-1 (99% top-5)! Check out the original code & weights at https://github.com/baaivision/EVA for more details on their work blending MIM, CLIP w/ many model, dataset, and train recipe tweaks.

| model                                              |top1  |top5  |param_count|img_size|
|----------------------------------------------------|------|------|-----------|--------|
| [eva02_large_patch14_448.mim_m38m_ft_in22k_in1k](https://huggingface.co/timm/eva02_large_patch14_448.mim_m38m_ft_in1k) |90.054|99.042|305.08     |448     |
| eva02_large_patch14_448.mim_in22k_ft_in22k_in1k    |89.946|99.01 |305.08     |448     |
| eva_giant_patch14_560.m30m_ft_in22k_in1k           |89.792|98.992|1014.45    |560     |
| eva02_large_patch14_448.mim_in22k_ft_in1k          |89.626|98.954|305.08     |448     |
| eva02_large_patch14_448.mim_m38m_ft_in1k           |89.57 |98.918|305.08     |448     |
| eva_giant_patch14_336.m30m_ft_in22k_in1k           |89.56 |98.956|1013.01    |336     |
| eva_giant_patch14_336.clip_ft_in1k                 |89.466|98.82 |1013.01    |336     |
| eva_large_patch14_336.in22k_ft_in22k_in1k          |89.214|98.854|304.53     |336     |
| eva_giant_patch14_224.clip_ft_in1k                 |88.882|98.678|1012.56    |224     |
| eva02_base_patch14_448.mim_in22k_ft_in22k_in1k     |88.692|98.722|87.12      |448     |
| eva_large_patch14_336.in22k_ft_in1k                |88.652|98.722|304.53     |336     |
| eva_large_patch14_196.in22k_ft_in22k_in1k          |88.592|98.656|304.14     |196     |
| eva02_base_patch14_448.mim_in22k_ft_in1k           |88.23 |98.564|87.12      |448     |
| eva_large_patch14_196.in22k_ft_in1k                |87.934|98.504|304.14     |196     |
| eva02_small_patch14_336.mim_in22k_ft_in1k          |85.74 |97.614|22.13      |336     |
| eva02_tiny_patch14_336.mim_in22k_ft_in1k           |80.658|95.524|5.76       |336     |

* Multi-weight and HF hub for DeiT and MLP-Mixer based models

### March 22, 2023
* More weights pushed to HF hub along with multi-weight support, including: `regnet.py`, `rexnet.py`, `byobnet.py`, `resnetv2.py`, `swin_transformer.py`, `swin_transformer_v2.py`, `swin_transformer_v2_cr.py`
* Swin Transformer models support feature extraction (NCHW feat maps for `swinv2_cr_*`, and NHWC for all others) and spatial embedding outputs.
* FocalNet (from https://github.com/microsoft/FocalNet) models and weights added with significant refactoring, feature extraction, no fixed resolution / sizing constraint
* RegNet weights increased with HF hub push, SWAG, SEER, and torchvision v2 weights. SEER is pretty poor wrt to performance for model size, but possibly useful.
* More ImageNet-12k pretrained and 1k fine-tuned `timm` weights:
  * `rexnetr_200.sw_in12k_ft_in1k` - 82.6 @ 224, 83.2 @ 288
  * `rexnetr_300.sw_in12k_ft_in1k` - 84.0 @ 224, 84.5 @ 288
  * `regnety_120.sw_in12k_ft_in1k` - 85.0 @ 224, 85.4 @ 288
  * `regnety_160.lion_in12k_ft_in1k` - 85.6 @ 224, 86.0 @ 288
  * `regnety_160.sw_in12k_ft_in1k` - 85.6 @ 224, 86.0 @ 288  (compare to SWAG PT + 1k FT this is same BUT much lower res, blows SEER FT away)
* Model name deprecation + remapping functionality added (a milestone for bringing 0.8.x out of pre-release). Mappings being added...
* Minor bug fixes and improvements.

### Feb 26, 2023
* Add ConvNeXt-XXLarge CLIP pretrained image tower weights for fine-tune & features (fine-tuning TBD) -- see [model card](https://huggingface.co/laion/CLIP-convnext_xxlarge-laion2B-s34B-b82K-augreg-soup)
* Update `convnext_xxlarge` default LayerNorm eps to 1e-5 (for CLIP weights, improved stability)
* 0.8.15dev0

### Feb 20, 2023
* Add 320x320 `convnext_large_mlp.clip_laion2b_ft_320` and `convnext_large_mlp.clip_laion2b_ft_soup_320` CLIP image tower weights for features & fine-tune
* 0.8.13dev0 pypi release for latest changes w/ move to huggingface org

### Feb 16, 2023
* `safetensor` checkpoint support added
* Add ideas from 'Scaling Vision Transformers to 22 B. Params' (https://arxiv.org/abs/2302.05442) -- qk norm, RmsNorm, parallel block
* Add F.scaled_dot_product_attention support (PyTorch 2.0 only) to `vit_*`, `vit_relpos*`, `coatnet` / `maxxvit` (to start)
* Lion optimizer (w/ multi-tensor option) added (https://arxiv.org/abs/2302.06675)
* gradient checkpointing works with `features_only=True`

### Feb 7, 2023
* New inference benchmark numbers added in [results](results/) folder.
* Add convnext LAION CLIP trained weights and initial set of in1k fine-tunes
  * `convnext_base.clip_laion2b_augreg_ft_in1k` - 86.2% @ 256x256
  * `convnext_base.clip_laiona_augreg_ft_in1k_384` - 86.5% @ 384x384
  * `convnext_large_mlp.clip_laion2b_augreg_ft_in1k` - 87.3% @ 256x256
  * `convnext_large_mlp.clip_laion2b_augreg_ft_in1k_384` - 87.9% @ 384x384
* Add DaViT models. Supports `features_only=True`. Adapted from https://github.com/dingmyu/davit by [Fredo](https://github.com/fffffgggg54).
* Use a common NormMlpClassifierHead across MaxViT, ConvNeXt, DaViT
* Add EfficientFormer-V2 model, update EfficientFormer, and refactor LeViT (closely related architectures). Weights on HF hub.
  * New EfficientFormer-V2 arch, significant refactor from original at (https://github.com/snap-research/EfficientFormer). Supports `features_only=True`.
  * Minor updates to EfficientFormer.
  * Refactor LeViT models to stages, add `features_only=True` support to new `conv` variants, weight remap required.
* Move ImageNet meta-data (synsets, indices) from `/results` to [`timm/data/_info`](timm/data/_info/).
* Add ImageNetInfo / DatasetInfo classes to provide labelling for various ImageNet classifier layouts in `timm`
  * Update `inference.py` to use, try: `python inference.py --data-dir /folder/to/images --model convnext_small.in12k --label-type detail --topk 5`
* Ready for 0.8.10 pypi pre-release (final testing).

### Jan 20, 2023
* Add two convnext 12k -> 1k fine-tunes at 384x384
  * `convnext_tiny.in12k_ft_in1k_384` - 85.1 @ 384
  * `convnext_small.in12k_ft_in1k_384` - 86.2 @ 384

* Push all MaxxViT weights to HF hub, and add new ImageNet-12k -> 1k fine-tunes for `rw` base MaxViT and CoAtNet 1/2 models

|model                                                                                                                   |top1 |top5 |samples / sec  |Params (M)     |GMAC  |Act (M)|
|------------------------------------------------------------------------------------------------------------------------|----:|----:|--------------:|--------------:|-----:|------:|
|[maxvit_xlarge_tf_512.in21k_ft_in1k](https://huggingface.co/timm/maxvit_xlarge_tf_512.in21k_ft_in1k)                    |88.53|98.64|          21.76|         475.77|534.14|1413.22|
|[maxvit_xlarge_tf_384.in21k_ft_in1k](https://huggingface.co/timm/maxvit_xlarge_tf_384.in21k_ft_in1k)                    |88.32|98.54|          42.53|         475.32|292.78| 668.76|
|[maxvit_base_tf_512.in21k_ft_in1k](https://huggingface.co/timm/maxvit_base_tf_512.in21k_ft_in1k)                        |88.20|98.53|          50.87|         119.88|138.02| 703.99|
|[maxvit_large_tf_512.in21k_ft_in1k](https://huggingface.co/timm/maxvit_large_tf_512.in21k_ft_in1k)                      |88.04|98.40|          36.42|         212.33|244.75| 942.15|
|[maxvit_large_tf_384.in21k_ft_in1k](https://huggingface.co/timm/maxvit_large_tf_384.in21k_ft_in1k)                      |87.98|98.56|          71.75|         212.03|132.55| 445.84|
|[maxvit_base_tf_384.in21k_ft_in1k](https://huggingface.co/timm/maxvit_base_tf_384.in21k_ft_in1k)                        |87.92|98.54|         104.71|         119.65| 73.80| 332.90|
|[maxvit_rmlp_base_rw_384.sw_in12k_ft_in1k](https://huggingface.co/timm/maxvit_rmlp_base_rw_384.sw_in12k_ft_in1k)        |87.81|98.37|         106.55|         116.14| 70.97| 318.95|
|[maxxvitv2_rmlp_base_rw_384.sw_in12k_ft_in1k](https://huggingface.co/timm/maxxvitv2_rmlp_base_rw_384.sw_in12k_ft_in1k)  |87.47|98.37|         149.49|         116.09| 72.98| 213.74|
|[coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k)            |87.39|98.31|         160.80|          73.88| 47.69| 209.43|
|[maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k)        |86.89|98.02|         375.86|         116.14| 23.15|  92.64|
|[maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k)  |86.64|98.02|         501.03|         116.09| 24.20|  62.77|
|[maxvit_base_tf_512.in1k](https://huggingface.co/timm/maxvit_base_tf_512.in1k)                                          |86.60|97.92|          50.75|         119.88|138.02| 703.99|
|[coatnet_2_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_2_rw_224.sw_in12k_ft_in1k)                      |86.57|97.89|         631.88|          73.87| 15.09|  49.22|
|[maxvit_large_tf_512.in1k](https://huggingface.co/timm/maxvit_large_tf_512.in1k)                                        |86.52|97.88|          36.04|         212.33|244.75| 942.15|
|[coatnet_rmlp_2_rw_224.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_rmlp_2_rw_224.sw_in12k_ft_in1k)            |86.49|97.90|         620.58|          73.88| 15.18|  54.78|
|[maxvit_base_tf_384.in1k](https://huggingface.co/timm/maxvit_base_tf_384.in1k)                                          |86.29|97.80|         101.09|         119.65| 73.80| 332.90|
|[maxvit_large_tf_384.in1k](https://huggingface.co/timm/maxvit_large_tf_384.in1k)                                        |86.23|97.69|          70.56|         212.03|132.55| 445.84|
|[maxvit_small_tf_512.in1k](https://huggingface.co/timm/maxvit_small_tf_512.in1k)                                        |86.10|97.76|          88.63|          69.13| 67.26| 383.77|
|[maxvit_tiny_tf_512.in1k](https://huggingface.co/timm/maxvit_tiny_tf_512.in1k)                                          |85.67|97.58|         144.25|          31.05| 33.49| 257.59|
|[maxvit_small_tf_384.in1k](https://huggingface.co/timm/maxvit_small_tf_384.in1k)                                        |85.54|97.46|         188.35|          69.02| 35.87| 183.65|
|[maxvit_tiny_tf_384.in1k](https://huggingface.co/timm/maxvit_tiny_tf_384.in1k)                                          |85.11|97.38|         293.46|          30.98| 17.53| 123.42|
|[maxvit_large_tf_224.in1k](https://huggingface.co/timm/maxvit_large_tf_224.in1k)                                        |84.93|96.97|         247.71|         211.79| 43.68| 127.35|
|[coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1k](https://huggingface.co/timm/coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1k)          |84.90|96.96|        1025.45|          41.72|  8.11|  40.13|
|[maxvit_base_tf_224.in1k](https://huggingface.co/timm/maxvit_base_tf_224.in1k)                                          |84.85|96.99|         358.25|         119.47| 24.04|  95.01|
|[maxxvit_rmlp_small_rw_256.sw_in1k](https://huggingface.co/timm/maxxvit_rmlp_small_rw_256.sw_in1k)                      |84.63|97.06|         575.53|          66.01| 14.6
Download .txt
gitextract_0n5u8q1i/

├── .gitattributes
├── .github/
│   ├── FUNDING.yml
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   ├── config.yml
│   │   └── feature_request.md
│   └── workflows/
│       ├── build_documentation.yml
│       ├── build_pr_documentation.yml
│       ├── tests.yml
│       ├── trufflehog.yml
│       └── upload_pr_documentation.yml
├── .gitignore
├── CITATION.cff
├── CLAUDE.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── UPGRADING.md
├── avg_checkpoints.py
├── benchmark.py
├── bulk_runner.py
├── clean_checkpoint.py
├── convert/
│   ├── convert_from_mxnet.py
│   └── convert_nest_flax.py
├── distributed_train.sh
├── hfdocs/
│   ├── README.md
│   └── source/
│       ├── _toctree.yml
│       ├── changes.mdx
│       ├── feature_extraction.mdx
│       ├── hf_hub.mdx
│       ├── hparams.mdx
│       ├── index.mdx
│       ├── installation.mdx
│       ├── models/
│       │   ├── adversarial-inception-v3.mdx
│       │   ├── advprop.mdx
│       │   ├── big-transfer.mdx
│       │   ├── csp-darknet.mdx
│       │   ├── csp-resnet.mdx
│       │   ├── csp-resnext.mdx
│       │   ├── densenet.mdx
│       │   ├── dla.mdx
│       │   ├── dpn.mdx
│       │   ├── ecaresnet.mdx
│       │   ├── efficientnet-pruned.mdx
│       │   ├── efficientnet.mdx
│       │   ├── ensemble-adversarial.mdx
│       │   ├── ese-vovnet.mdx
│       │   ├── fbnet.mdx
│       │   ├── gloun-inception-v3.mdx
│       │   ├── gloun-resnet.mdx
│       │   ├── gloun-resnext.mdx
│       │   ├── gloun-senet.mdx
│       │   ├── gloun-seresnext.mdx
│       │   ├── gloun-xception.mdx
│       │   ├── hrnet.mdx
│       │   ├── ig-resnext.mdx
│       │   ├── inception-resnet-v2.mdx
│       │   ├── inception-v3.mdx
│       │   ├── inception-v4.mdx
│       │   ├── legacy-se-resnet.mdx
│       │   ├── legacy-se-resnext.mdx
│       │   ├── legacy-senet.mdx
│       │   ├── mixnet.mdx
│       │   ├── mnasnet.mdx
│       │   ├── mobilenet-v2.mdx
│       │   ├── mobilenet-v3.mdx
│       │   ├── nasnet.mdx
│       │   ├── noisy-student.mdx
│       │   ├── pnasnet.mdx
│       │   ├── regnetx.mdx
│       │   ├── regnety.mdx
│       │   ├── res2net.mdx
│       │   ├── res2next.mdx
│       │   ├── resnest.mdx
│       │   ├── resnet-d.mdx
│       │   ├── resnet.mdx
│       │   ├── resnext.mdx
│       │   ├── rexnet.mdx
│       │   ├── se-resnet.mdx
│       │   ├── selecsls.mdx
│       │   ├── seresnext.mdx
│       │   ├── skresnet.mdx
│       │   ├── skresnext.mdx
│       │   ├── spnasnet.mdx
│       │   ├── ssl-resnet.mdx
│       │   ├── swsl-resnet.mdx
│       │   ├── swsl-resnext.mdx
│       │   ├── tf-efficientnet-condconv.mdx
│       │   ├── tf-efficientnet-lite.mdx
│       │   ├── tf-efficientnet.mdx
│       │   ├── tf-inception-v3.mdx
│       │   ├── tf-mixnet.mdx
│       │   ├── tf-mobilenet-v3.mdx
│       │   ├── tresnet.mdx
│       │   ├── wide-resnet.mdx
│       │   └── xception.mdx
│       ├── models.mdx
│       ├── quickstart.mdx
│       ├── reference/
│       │   ├── data.mdx
│       │   ├── models.mdx
│       │   ├── optimizers.mdx
│       │   └── schedulers.mdx
│       ├── results.mdx
│       └── training_script.mdx
├── hubconf.py
├── inference.py
├── onnx_export.py
├── onnx_validate.py
├── pyproject.toml
├── requirements-dev.txt
├── requirements.txt
├── results/
│   ├── README.md
│   ├── benchmark-infer-amp-nchw-pt113-cu117-rtx3090.csv
│   ├── benchmark-infer-amp-nchw-pt210-cu121-rtx3090.csv
│   ├── benchmark-infer-amp-nchw-pt240-cu124-rtx3090.csv
│   ├── benchmark-infer-amp-nchw-pt240-cu124-rtx4090-dynamo.csv
│   ├── benchmark-infer-amp-nchw-pt240-cu124-rtx4090.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu128-4090-dynamo.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu128-4090.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu130-5090-dynamo.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu130-5090.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu130-pro6000maxq-dynamo.csv
│   ├── benchmark-infer-amp-nchw-pt291-cu130-pro6000maxq.csv
│   ├── benchmark-infer-amp-nhwc-pt113-cu117-rtx3090.csv
│   ├── benchmark-infer-amp-nhwc-pt210-cu121-rtx3090.csv
│   ├── benchmark-infer-amp-nhwc-pt240-cu124-rtx3090.csv
│   ├── benchmark-infer-amp-nhwc-pt240-cu124-rtx4090.csv
│   ├── benchmark-infer-amp_bf16-nchw-pt291-cu130-pro6000maxq-dynamo.csv
│   ├── benchmark-infer-bf16-nchw-pt291-cu130-pro6000maxq-dynamo.csv
│   ├── benchmark-infer-fp32-nchw-pt221-cpu-i9_10940x-dynamo.csv
│   ├── benchmark-infer-fp32-nchw-pt240-cpu-i7_12700h-dynamo.csv
│   ├── benchmark-infer-fp32-nchw-pt240-cpu-i9_10940x-dynamo.csv
│   ├── benchmark-train-amp-nchw-pt112-cu113-rtx3090.csv
│   ├── benchmark-train-amp-nhwc-pt112-cu113-rtx3090.csv
│   ├── generate_csv_results.py
│   ├── model_metadata-in1k.csv
│   ├── results-imagenet-a-clean.csv
│   ├── results-imagenet-a.csv
│   ├── results-imagenet-r-clean.csv
│   ├── results-imagenet-r.csv
│   ├── results-imagenet-real.csv
│   ├── results-imagenet.csv
│   ├── results-imagenetv2-matched-frequency.csv
│   └── results-sketch.csv
├── setup.cfg
├── tests/
│   ├── __init__.py
│   ├── test_checkpoint_loading.py
│   ├── test_layers.py
│   ├── test_layers_drop.py
│   ├── test_layers_pool.py
│   ├── test_models.py
│   ├── test_optim.py
│   ├── test_scheduler.py
│   └── test_utils.py
├── timm/
│   ├── __init__.py
│   ├── data/
│   │   ├── __init__.py
│   │   ├── _info/
│   │   │   ├── imagenet12k_synsets.txt
│   │   │   ├── imagenet21k_goog_synsets.txt
│   │   │   ├── imagenet21k_goog_to_12k_indices.txt
│   │   │   ├── imagenet21k_goog_to_22k_indices.txt
│   │   │   ├── imagenet21k_miil_synsets.txt
│   │   │   ├── imagenet21k_miil_w21_synsets.txt
│   │   │   ├── imagenet22k_ms_synsets.txt
│   │   │   ├── imagenet22k_ms_to_12k_indices.txt
│   │   │   ├── imagenet22k_ms_to_22k_indices.txt
│   │   │   ├── imagenet22k_synsets.txt
│   │   │   ├── imagenet22k_to_12k_indices.txt
│   │   │   ├── imagenet_a_indices.txt
│   │   │   ├── imagenet_a_synsets.txt
│   │   │   ├── imagenet_r_indices.txt
│   │   │   ├── imagenet_r_synsets.txt
│   │   │   ├── imagenet_real_labels.json
│   │   │   ├── imagenet_synset_to_definition.txt
│   │   │   ├── imagenet_synset_to_lemma.txt
│   │   │   ├── imagenet_synsets.txt
│   │   │   ├── mini_imagenet_indices.txt
│   │   │   └── mini_imagenet_synsets.txt
│   │   ├── auto_augment.py
│   │   ├── config.py
│   │   ├── constants.py
│   │   ├── dataset.py
│   │   ├── dataset_factory.py
│   │   ├── dataset_info.py
│   │   ├── distributed_sampler.py
│   │   ├── imagenet_info.py
│   │   ├── loader.py
│   │   ├── mixup.py
│   │   ├── naflex_dataset.py
│   │   ├── naflex_loader.py
│   │   ├── naflex_mixup.py
│   │   ├── naflex_random_erasing.py
│   │   ├── naflex_transforms.py
│   │   ├── random_erasing.py
│   │   ├── readers/
│   │   │   ├── __init__.py
│   │   │   ├── class_map.py
│   │   │   ├── img_extensions.py
│   │   │   ├── reader.py
│   │   │   ├── reader_factory.py
│   │   │   ├── reader_hfds.py
│   │   │   ├── reader_hfids.py
│   │   │   ├── reader_image_folder.py
│   │   │   ├── reader_image_in_tar.py
│   │   │   ├── reader_image_tar.py
│   │   │   ├── reader_tfds.py
│   │   │   ├── reader_wds.py
│   │   │   └── shared_count.py
│   │   ├── real_labels.py
│   │   ├── tf_preprocessing.py
│   │   ├── transforms.py
│   │   └── transforms_factory.py
│   ├── layers/
│   │   ├── __init__.py
│   │   ├── _fx.py
│   │   ├── activations.py
│   │   ├── activations_me.py
│   │   ├── adaptive_avgmax_pool.py
│   │   ├── attention.py
│   │   ├── attention2d.py
│   │   ├── attention_pool.py
│   │   ├── attention_pool2d.py
│   │   ├── blur_pool.py
│   │   ├── bottleneck_attn.py
│   │   ├── cbam.py
│   │   ├── classifier.py
│   │   ├── cond_conv2d.py
│   │   ├── config.py
│   │   ├── conv2d_same.py
│   │   ├── conv_bn_act.py
│   │   ├── coord_attn.py
│   │   ├── create_act.py
│   │   ├── create_attn.py
│   │   ├── create_conv2d.py
│   │   ├── create_norm.py
│   │   ├── create_norm_act.py
│   │   ├── diff_attention.py
│   │   ├── drop.py
│   │   ├── eca.py
│   │   ├── evo_norm.py
│   │   ├── fast_norm.py
│   │   ├── filter_response_norm.py
│   │   ├── format.py
│   │   ├── gather_excite.py
│   │   ├── global_context.py
│   │   ├── grid.py
│   │   ├── grn.py
│   │   ├── halo_attn.py
│   │   ├── helpers.py
│   │   ├── hybrid_embed.py
│   │   ├── inplace_abn.py
│   │   ├── interpolate.py
│   │   ├── lambda_layer.py
│   │   ├── layer_scale.py
│   │   ├── linear.py
│   │   ├── median_pool.py
│   │   ├── mixed_conv2d.py
│   │   ├── ml_decoder.py
│   │   ├── mlp.py
│   │   ├── non_local_attn.py
│   │   ├── norm.py
│   │   ├── norm_act.py
│   │   ├── other_pool.py
│   │   ├── padding.py
│   │   ├── patch_dropout.py
│   │   ├── patch_embed.py
│   │   ├── pool1d.py
│   │   ├── pool2d_same.py
│   │   ├── pos_embed.py
│   │   ├── pos_embed_rel.py
│   │   ├── pos_embed_sincos.py
│   │   ├── selective_kernel.py
│   │   ├── separable_conv.py
│   │   ├── space_to_depth.py
│   │   ├── split_attn.py
│   │   ├── split_batchnorm.py
│   │   ├── squeeze_excite.py
│   │   ├── std_conv.py
│   │   ├── test_time_pool.py
│   │   ├── trace_utils.py
│   │   ├── typing.py
│   │   └── weight_init.py
│   ├── loss/
│   │   ├── __init__.py
│   │   ├── asymmetric_loss.py
│   │   ├── binary_cross_entropy.py
│   │   ├── cross_entropy.py
│   │   └── jsd.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── _builder.py
│   │   ├── _efficientnet_blocks.py
│   │   ├── _efficientnet_builder.py
│   │   ├── _factory.py
│   │   ├── _features.py
│   │   ├── _features_fx.py
│   │   ├── _helpers.py
│   │   ├── _hub.py
│   │   ├── _manipulate.py
│   │   ├── _pretrained.py
│   │   ├── _prune.py
│   │   ├── _pruned/
│   │   │   ├── ecaresnet101d_pruned.txt
│   │   │   ├── ecaresnet50d_pruned.txt
│   │   │   ├── efficientnet_b1_pruned.txt
│   │   │   ├── efficientnet_b2_pruned.txt
│   │   │   └── efficientnet_b3_pruned.txt
│   │   ├── _registry.py
│   │   ├── beit.py
│   │   ├── byoanet.py
│   │   ├── byobnet.py
│   │   ├── cait.py
│   │   ├── coat.py
│   │   ├── convit.py
│   │   ├── convmixer.py
│   │   ├── convnext.py
│   │   ├── crossvit.py
│   │   ├── csatv2.py
│   │   ├── cspnet.py
│   │   ├── davit.py
│   │   ├── deit.py
│   │   ├── densenet.py
│   │   ├── dla.py
│   │   ├── dpn.py
│   │   ├── edgenext.py
│   │   ├── efficientformer.py
│   │   ├── efficientformer_v2.py
│   │   ├── efficientnet.py
│   │   ├── efficientvit_mit.py
│   │   ├── efficientvit_msra.py
│   │   ├── eva.py
│   │   ├── factory.py
│   │   ├── fasternet.py
│   │   ├── fastvit.py
│   │   ├── features.py
│   │   ├── focalnet.py
│   │   ├── fx_features.py
│   │   ├── gcvit.py
│   │   ├── ghostnet.py
│   │   ├── hardcorenas.py
│   │   ├── helpers.py
│   │   ├── hgnet.py
│   │   ├── hiera.py
│   │   ├── hieradet_sam2.py
│   │   ├── hrnet.py
│   │   ├── hub.py
│   │   ├── inception_next.py
│   │   ├── inception_resnet_v2.py
│   │   ├── inception_v3.py
│   │   ├── inception_v4.py
│   │   ├── layers/
│   │   │   └── __init__.py
│   │   ├── levit.py
│   │   ├── mambaout.py
│   │   ├── maxxvit.py
│   │   ├── metaformer.py
│   │   ├── mlp_mixer.py
│   │   ├── mobilenetv3.py
│   │   ├── mobilenetv5.py
│   │   ├── mobilevit.py
│   │   ├── mvitv2.py
│   │   ├── naflexvit.py
│   │   ├── nasnet.py
│   │   ├── nest.py
│   │   ├── nextvit.py
│   │   ├── nfnet.py
│   │   ├── pit.py
│   │   ├── pnasnet.py
│   │   ├── pvt_v2.py
│   │   ├── rdnet.py
│   │   ├── registry.py
│   │   ├── regnet.py
│   │   ├── repghost.py
│   │   ├── repvit.py
│   │   ├── res2net.py
│   │   ├── resnest.py
│   │   ├── resnet.py
│   │   ├── resnetv2.py
│   │   ├── rexnet.py
│   │   ├── selecsls.py
│   │   ├── senet.py
│   │   ├── sequencer.py
│   │   ├── shvit.py
│   │   ├── sknet.py
│   │   ├── starnet.py
│   │   ├── swiftformer.py
│   │   ├── swin_transformer.py
│   │   ├── swin_transformer_v2.py
│   │   ├── swin_transformer_v2_cr.py
│   │   ├── tiny_vit.py
│   │   ├── tnt.py
│   │   ├── tresnet.py
│   │   ├── twins.py
│   │   ├── vgg.py
│   │   ├── visformer.py
│   │   ├── vision_transformer.py
│   │   ├── vision_transformer_hybrid.py
│   │   ├── vision_transformer_relpos.py
│   │   ├── vision_transformer_sam.py
│   │   ├── vitamin.py
│   │   ├── volo.py
│   │   ├── vovnet.py
│   │   ├── xception.py
│   │   ├── xception_aligned.py
│   │   └── xcit.py
│   ├── optim/
│   │   ├── __init__.py
│   │   ├── _optim_factory.py
│   │   ├── _param_groups.py
│   │   ├── _types.py
│   │   ├── adabelief.py
│   │   ├── adafactor.py
│   │   ├── adafactor_bv.py
│   │   ├── adahessian.py
│   │   ├── adamp.py
│   │   ├── adamw.py
│   │   ├── adan.py
│   │   ├── adopt.py
│   │   ├── kron.py
│   │   ├── lamb.py
│   │   ├── laprop.py
│   │   ├── lars.py
│   │   ├── lion.py
│   │   ├── lookahead.py
│   │   ├── madgrad.py
│   │   ├── mars.py
│   │   ├── muon.py
│   │   ├── nadam.py
│   │   ├── nadamw.py
│   │   ├── nvnovograd.py
│   │   ├── optim_factory.py
│   │   ├── radam.py
│   │   ├── rmsprop_tf.py
│   │   ├── sgdp.py
│   │   └── sgdw.py
│   ├── py.typed
│   ├── scheduler/
│   │   ├── __init__.py
│   │   ├── cosine_lr.py
│   │   ├── multistep_lr.py
│   │   ├── plateau_lr.py
│   │   ├── poly_lr.py
│   │   ├── scheduler.py
│   │   ├── scheduler_factory.py
│   │   ├── step_lr.py
│   │   └── tanh_lr.py
│   ├── task/
│   │   ├── __init__.py
│   │   ├── classification.py
│   │   ├── distillation.py
│   │   ├── task.py
│   │   └── token_distillation.py
│   ├── utils/
│   │   ├── __init__.py
│   │   ├── agc.py
│   │   ├── attention_extract.py
│   │   ├── checkpoint_saver.py
│   │   ├── clip_grad.py
│   │   ├── cuda.py
│   │   ├── decay_batch.py
│   │   ├── distributed.py
│   │   ├── jit.py
│   │   ├── log.py
│   │   ├── metrics.py
│   │   ├── misc.py
│   │   ├── model.py
│   │   ├── model_ema.py
│   │   ├── onnx.py
│   │   ├── random.py
│   │   └── summary.py
│   └── version.py
├── train.py
└── validate.py
Download .txt
Showing preview only (508K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (6238 symbols across 279 files)

FILE: avg_checkpoints.py
  function checkpoint_metric (line 47) | def checkpoint_metric(checkpoint_path):
  function main (line 62) | def main():

FILE: benchmark.py
  function timestamp (line 149) | def timestamp(sync=False):
  function cuda_timestamp (line 153) | def cuda_timestamp(sync=False, device=None):
  function count_params (line 159) | def count_params(model: nn.Module):
  function resolve_precision (line 163) | def resolve_precision(precision: str):
  function profile_deepspeed (line 181) | def profile_deepspeed(model, input_size=(3, 224, 224), batch_size=1, det...
  function profile_fvcore (line 194) | def profile_fvcore(model, input_size=(3, 224, 224), batch_size=1, detail...
  class BenchmarkRunner (line 207) | class BenchmarkRunner:
    method __init__ (line 208) | def __init__(
    method _init_input (line 286) | def _init_input(self):
  class InferenceBenchmarkRunner (line 293) | class InferenceBenchmarkRunner(BenchmarkRunner):
    method __init__ (line 295) | def __init__(
    method run (line 305) | def run(self):
  class TrainBenchmarkRunner (line 368) | class TrainBenchmarkRunner(BenchmarkRunner):
    method __init__ (line 370) | def __init__(
    method _gen_target (line 391) | def _gen_target(self, batch_size):
    method run (line 395) | def run(self):
  class ProfileRunner (line 494) | class ProfileRunner(BenchmarkRunner):
    method __init__ (line 496) | def __init__(self, model_name, device='cuda', profiler='', **kwargs):
    method run (line 507) | def run(self):
  function _try_run (line 534) | def _try_run(
  function benchmark (line 564) | def benchmark(args):
  function main (line 620) | def main():
  function write_results (line 675) | def write_results(results_file, results, format='csv'):

FILE: bulk_runner.py
  function cmd_from_args (line 73) | def cmd_from_args(args) -> Tuple[Union[Callable, str], List[str]]:
  function _get_model_cfgs (line 96) | def _get_model_cfgs(
  function main (line 141) | def main():
  function write_results (line 234) | def write_results(results_file, results):

FILE: clean_checkpoint.py
  function main (line 38) | def main():
  function clean_checkpoint (line 55) | def clean_checkpoint(

FILE: convert/convert_from_mxnet.py
  function convert (line 15) | def convert(mxnet_name, torch_name):
  function map_mx_to_torch_model (line 74) | def map_mx_to_torch_model(mx_name):
  function main (line 93) | def main():

FILE: convert/convert_nest_flax.py
  function convert_nest (line 21) | def convert_nest(checkpoint_path, arch):

FILE: inference.py
  function main (line 150) | def main():
  function save_results (line 353) | def save_results(df, results_filename, results_format='csv', filename_co...

FILE: onnx_export.py
  function main (line 65) | def main():

FILE: onnx_validate.py
  function main (line 43) | def main():
  function accuracy_np (line 102) | def accuracy_np(output, target):

FILE: results/generate_csv_results.py
  function diff (line 20) | def diff(base_df, test_csv):

FILE: tests/test_checkpoint_loading.py
  class _CustomPayload (line 14) | class _CustomPayload:
    method __init__ (line 15) | def __init__(self, value: int = 1):
  function test_weights_only_allows_argparse_namespace (line 23) | def test_weights_only_allows_argparse_namespace(tmp_path):
  function test_weights_only_blocks_non_allowlisted_globals (line 36) | def test_weights_only_blocks_non_allowlisted_globals(tmp_path):
  function test_resume_checkpoint_default_weights_only_namespace (line 52) | def test_resume_checkpoint_default_weights_only_namespace(tmp_path):
  function test_resume_checkpoint_blocks_non_allowlisted_globals (line 82) | def test_resume_checkpoint_blocks_non_allowlisted_globals(tmp_path):
  function test_resume_checkpoint_weights_only_false_allows_custom_globals (line 95) | def test_resume_checkpoint_weights_only_false_allows_custom_globals(tmp_...

FILE: tests/test_layers.py
  class MLP (line 15) | class MLP(nn.Module):
    method __init__ (line 16) | def __init__(self, act_layer="relu", inplace=True):
    method forward (line 22) | def forward(self, x):
  function _run_act_layer_grad (line 29) | def _run_act_layer_grad(act_type, inplace=True):
  function test_swish_grad (line 57) | def test_swish_grad():
  function test_mish_grad (line 62) | def test_mish_grad():
  function test_hard_sigmoid_grad (line 67) | def test_hard_sigmoid_grad():
  function test_hard_swish_grad (line 72) | def test_hard_swish_grad():
  function test_hard_mish_grad (line 77) | def test_hard_mish_grad():
  function test_get_act_layer_empty_string (line 81) | def test_get_act_layer_empty_string():
  function test_create_act_layer_inplace_error (line 86) | def test_create_act_layer_inplace_error():
  function test_create_act_layer_edge_cases (line 98) | def test_create_act_layer_edge_cases():
  function test_get_act_fn_callable (line 113) | def test_get_act_fn_callable():
  function test_get_act_fn_none (line 119) | def test_get_act_fn_none():
  function test_mqa_v2 (line 127) | def test_mqa_v2(dim, dim_out, use_m):
  function test_attn2d (line 145) | def test_attn2d(bias, expand_first, head_first, attn_mask):

FILE: tests/test_layers_drop.py
  class TestDropBlock2d (line 8) | class TestDropBlock2d:
    method test_drop_block_2d_output_shape (line 11) | def test_drop_block_2d_output_shape(self):
    method test_drop_block_2d_no_drop_when_prob_zero (line 18) | def test_drop_block_2d_no_drop_when_prob_zero(self):
    method test_drop_block_2d_approximate_keep_ratio (line 24) | def test_drop_block_2d_approximate_keep_ratio(self):
    method test_drop_block_2d_inplace (line 43) | def test_drop_block_2d_inplace(self):
    method test_drop_block_2d_couple_channels_true (line 51) | def test_drop_block_2d_couple_channels_true(self):
    method test_drop_block_2d_couple_channels_false (line 64) | def test_drop_block_2d_couple_channels_false(self):
    method test_drop_block_2d_with_noise (line 77) | def test_drop_block_2d_with_noise(self):
    method test_drop_block_2d_even_block_size (line 88) | def test_drop_block_2d_even_block_size(self):
    method test_drop_block_2d_asymmetric_input (line 95) | def test_drop_block_2d_asymmetric_input(self):
    method test_drop_block_2d_scale_by_keep (line 102) | def test_drop_block_2d_scale_by_keep(self):
  class TestDropBlock2dModule (line 122) | class TestDropBlock2dModule:
    method test_deprecated_args_accepted (line 125) | def test_deprecated_args_accepted(self):
    method test_unknown_args_warned (line 135) | def test_unknown_args_warned(self):
    method test_training_mode (line 140) | def test_training_mode(self):
    method test_couple_channels_parameter (line 156) | def test_couple_channels_parameter(self):
  class TestDropPath (line 183) | class TestDropPath:
    method test_no_drop_when_prob_zero (line 186) | def test_no_drop_when_prob_zero(self):
    method test_no_drop_when_not_training (line 192) | def test_no_drop_when_not_training(self):
    method test_drop_path_scaling (line 198) | def test_drop_path_scaling(self):
    method test_drop_path_no_scaling (line 214) | def test_drop_path_no_scaling(self):
  class TestDropPathModule (line 227) | class TestDropPathModule:
    method test_training_mode (line 230) | def test_training_mode(self):
    method test_extra_repr (line 247) | def test_extra_repr(self):

FILE: tests/test_layers_pool.py
  class TestAdaptiveAvgMaxPool (line 17) | class TestAdaptiveAvgMaxPool:
    method test_adaptive_avgmax_pool2d (line 20) | def test_adaptive_avgmax_pool2d(self):
    method test_select_adaptive_pool2d (line 29) | def test_select_adaptive_pool2d(self):
    method test_adaptive_avgmax_pool2d_module (line 41) | def test_adaptive_avgmax_pool2d_module(self):
    method test_select_adaptive_pool2d_module (line 48) | def test_select_adaptive_pool2d_module(self):
    method test_select_adaptive_pool2d_fast (line 60) | def test_select_adaptive_pool2d_fast(self):
  class TestAttentionPool (line 75) | class TestAttentionPool:
    method test_attention_pool_latent_basic (line 78) | def test_attention_pool_latent_basic(self):
    method test_attention_pool_latent_multi_latent (line 85) | def test_attention_pool_latent_multi_latent(self):
    method test_attention_pool2d_basic (line 97) | def test_attention_pool2d_basic(self):
    method test_attention_pool2d_different_feat_size (line 104) | def test_attention_pool2d_different_feat_size(self):
    method test_rot_attention_pool2d_basic (line 113) | def test_rot_attention_pool2d_basic(self):
    method test_rot_attention_pool2d_different_sizes (line 120) | def test_rot_attention_pool2d_different_sizes(self):
    method test_rot_attention_pool2d_rope_types (line 128) | def test_rot_attention_pool2d_rope_types(self):
    method test_attention_pool2d_out_features (line 152) | def test_attention_pool2d_out_features(
    method test_attention_pool2d_reset (line 178) | def test_attention_pool2d_reset(
    method test_attention_pool2d_pre_logits (line 199) | def test_attention_pool2d_pre_logits(self, pool_cls, base_kwargs, inpu...
    method test_attention_pool2d_qkv_separate (line 211) | def test_attention_pool2d_qkv_separate(self, pool_cls, base_kwargs, in...
    method test_attention_pool2d_class_token (line 223) | def test_attention_pool2d_class_token(self, pool_cls, base_kwargs, inp...
    method test_attention_pool_prr_basic (line 231) | def test_attention_pool_prr_basic(self):
    method test_attention_pool_prr_avg_pool (line 238) | def test_attention_pool_prr_avg_pool(self):
    method test_attention_pool_prr_parameter_free (line 245) | def test_attention_pool_prr_parameter_free(self):
    method test_attention_pool_prr_with_norms (line 251) | def test_attention_pool_prr_with_norms(self):
    method test_attention_pool_latent_out_features (line 274) | def test_attention_pool_latent_out_features(self, out_features, embed_...
  class TestLsePool (line 297) | class TestLsePool:
    method test_lse_plus_2d_basic (line 300) | def test_lse_plus_2d_basic(self):
    method test_lse_plus_2d_no_flatten (line 308) | def test_lse_plus_2d_no_flatten(self):
    method test_lse_plus_1d_basic (line 315) | def test_lse_plus_1d_basic(self):
    method test_lse_high_r_approximates_max (line 322) | def test_lse_high_r_approximates_max(self):
    method test_lse_low_r_approximates_avg (line 330) | def test_lse_low_r_approximates_avg(self):
    method test_lse_learnable_r_gradient (line 338) | def test_lse_learnable_r_gradient(self):
  class TestSimPool (line 350) | class TestSimPool:
    method test_simpool_2d_basic (line 353) | def test_simpool_2d_basic(self):
    method test_simpool_1d_basic (line 360) | def test_simpool_1d_basic(self):
    method test_simpool_multi_head (line 367) | def test_simpool_multi_head(self):
    method test_simpool_with_gamma (line 375) | def test_simpool_with_gamma(self):
    method test_simpool_qk_norm (line 383) | def test_simpool_qk_norm(self):
  class TestPoolingCommon (line 393) | class TestPoolingCommon:
    method test_gradient_flow (line 407) | def test_gradient_flow(self, pool_cls, kwargs, input_shape):
    method test_torchscript (line 426) | def test_torchscript(self, pool_cls, kwargs, input_shape):
    method test_eval_deterministic (line 445) | def test_eval_deterministic(self, pool_cls, kwargs, input_shape):
    method test_different_spatial_sizes (line 460) | def test_different_spatial_sizes(self, pool_cls, kwargs, input_shape):
  class TestBlurPool (line 473) | class TestBlurPool:
    method test_blur_pool_2d_basic (line 476) | def test_blur_pool_2d_basic(self):
    method test_blur_pool_2d_stride (line 483) | def test_blur_pool_2d_stride(self):
  class TestPool1d (line 493) | class TestPool1d:
    method test_global_pool_nlc (line 496) | def test_global_pool_nlc(self):

FILE: tests/test_models.py
  function _get_input_size (line 101) | def _get_input_size(model=None, model_name='', target=None):
  function test_model_inference (line 130) | def test_model_inference(model_name, batch_size):
  function test_model_forward (line 174) | def test_model_forward(model_name, batch_size):
  function test_model_backward (line 207) | def test_model_backward(model_name, batch_size):
  function test_model_default_cfgs (line 257) | def test_model_default_cfgs(model_name, batch_size):
  function test_model_default_cfgs_non_std (line 335) | def test_model_default_cfgs_non_std(model_name, batch_size):
  function test_model_load_pretrained (line 406) | def test_model_load_pretrained(model_name, batch_size):
  function test_model_features_pretrained (line 415) | def test_model_features_pretrained(model_name, batch_size):
  function test_model_forward_torchscript (line 425) | def test_model_forward_torchscript(model_name, batch_size):
  function test_model_forward_features (line 455) | def test_model_forward_features(model_name, batch_size):
  function test_model_forward_intermediates_features (line 486) | def test_model_forward_intermediates_features(model_name, batch_size):
  function test_model_forward_intermediates (line 517) | def test_model_forward_intermediates(model_name, batch_size):
  function _create_fx_model (line 566) | def _create_fx_model(model, train=False):
  function test_model_forward_fx (line 621) | def test_model_forward_fx(model_name, batch_size):
  function test_model_backward_fx (line 656) | def test_model_backward_fx(model_name, batch_size):
  function test_model_forward_fx_torchscript (line 701) | def test_model_forward_fx_torchscript(model_name, batch_size):
  function test_model_forward_torchscript_with_features_fx (line 726) | def test_model_forward_torchscript_with_features_fx(model_name, batch_si...

FILE: tests/test_optim.py
  function _test_basic_cases_template (line 29) | def _test_basic_cases_template(weight, bias, input, constructor, schedul...
  function _test_state_dict (line 63) | def _test_state_dict(weight, bias, input, constructor):
  function _test_basic_cases (line 138) | def _test_basic_cases(constructor, scheduler_constructors=None):
  function _test_model (line 177) | def _test_model(optimizer, params, device=torch.device('cpu'), after_ste...
  function rosenbrock (line 214) | def rosenbrock(tensor):
  function drosenbrock (line 219) | def drosenbrock(tensor):
  function _test_rosenbrock (line 224) | def _test_rosenbrock(constructor, scheduler_constructors=None):
  function _build_params_dict (line 285) | def _build_params_dict(weight, bias, **kwargs):
  function _build_params_dict_single (line 289) | def _build_params_dict_single(weight, bias, **kwargs):
  function test_optim_factory (line 294) | def test_optim_factory(optimizer):
  function test_sgd (line 340) | def test_sgd(optimizer):
  function test_adam (line 382) | def test_adam(optimizer):
  function test_kron (line 390) | def test_kron(optimizer):
  function test_muon (line 398) | def test_muon(optimizer):
  function test_adamuon (line 406) | def test_adamuon(optimizer):
  function test_adopt (line 414) | def test_adopt(optimizer):
  function test_adan (line 422) | def test_adan(optimizer):
  function test_adabelief (line 430) | def test_adabelief(optimizer):
  function test_rectified (line 441) | def test_rectified(optimizer):
  function test_adaother (line 449) | def test_adaother(optimizer):
  function test_adafactor (line 460) | def test_adafactor(optimizer):
  function test_lamb (line 471) | def test_lamb(optimizer):
  function test_laprop (line 479) | def test_laprop(optimizer):
  function test_lars (line 487) | def test_lars(optimizer):
  function test_madgrad (line 495) | def test_madgrad(optimizer):
  function test_mars (line 503) | def test_mars(optimizer):
  function test_novograd (line 511) | def test_novograd(optimizer):
  function test_rmsprop (line 519) | def test_rmsprop(optimizer):
  function test_adamp (line 527) | def test_adamp(optimizer):
  function test_sgdp (line 535) | def test_sgdp(optimizer):
  function test_lookahead_sgd (line 543) | def test_lookahead_sgd(optimizer):
  function test_lookahead_adam (line 550) | def test_lookahead_adam(optimizer):
  function test_lookahead_radam (line 557) | def test_lookahead_radam(optimizer):
  function test_param_groups_layer_decay_with_min (line 563) | def test_param_groups_layer_decay_with_min():
  function test_param_groups_layer_decay_with_matcher (line 586) | def test_param_groups_layer_decay_with_matcher():
  function test_param_groups_weight_decay (line 612) | def test_param_groups_weight_decay():
  function test_cadamp (line 642) | def test_cadamp(optimizer):
  function test_csgdp (line 649) | def test_csgdp(optimizer):
  function test_csgdw (line 656) | def test_csgdw(optimizer):

FILE: tests/test_scheduler.py
  function _create_optimizer (line 21) | def _create_optimizer(lr: float = 0.1, num_groups: int = 1) -> torch.opt...
  class TestSchedulerBasics (line 31) | class TestSchedulerBasics:
    method test_scheduler_init (line 42) | def test_scheduler_init(self, scheduler_cls, kwargs):
    method test_scheduler_step (line 56) | def test_scheduler_step(self, scheduler_cls, kwargs):
    method test_plateau_scheduler_step (line 70) | def test_plateau_scheduler_step(self):
  class TestWarmup (line 80) | class TestWarmup:
    method test_warmup_lr_increases (line 90) | def test_warmup_lr_increases(self, scheduler_cls, kwargs):
    method test_warmup_prefix_reaches_target_lr (line 122) | def test_warmup_prefix_reaches_target_lr(self, scheduler_cls, kwargs):
  class TestCosineScheduler (line 149) | class TestCosineScheduler:
    method test_cosine_decay (line 152) | def test_cosine_decay(self):
    method test_cosine_cycles (line 178) | def test_cosine_cycles(self):
    method test_get_cycle_length (line 204) | def test_get_cycle_length(self):
  class TestStepScheduler (line 223) | class TestStepScheduler:
    method test_step_decay (line 226) | def test_step_decay(self):
  class TestMultiStepScheduler (line 252) | class TestMultiStepScheduler:
    method test_multistep_decay (line 255) | def test_multistep_decay(self):
  class TestPolyScheduler (line 285) | class TestPolyScheduler:
    method test_poly_decay (line 288) | def test_poly_decay(self):
  class TestTanhScheduler (line 311) | class TestTanhScheduler:
    method test_tanh_decay (line 314) | def test_tanh_decay(self):
  class TestStateDict (line 337) | class TestStateDict:
    method test_state_dict_save_load (line 347) | def test_state_dict_save_load(self, scheduler_cls, kwargs):
    method test_plateau_state_dict_save_load (line 368) | def test_plateau_state_dict_save_load(self):
  class TestStepUpdate (line 390) | class TestStepUpdate:
    method test_step_update_with_t_in_epochs_false (line 400) | def test_step_update_with_t_in_epochs_false(self, scheduler_cls, kwargs):
  class TestMultipleParamGroups (line 420) | class TestMultipleParamGroups:
    method test_multiple_param_groups (line 430) | def test_multiple_param_groups(self, scheduler_cls, kwargs):
  class TestNoise (line 447) | class TestNoise:
    method test_noise_range (line 456) | def test_noise_range(self, scheduler_cls, kwargs):
  class TestKDecay (line 494) | class TestKDecay:
    method test_cosine_k_decay (line 497) | def test_cosine_k_decay(self):
    method test_poly_k_decay (line 517) | def test_poly_k_decay(self):

FILE: tests/test_utils.py
  function test_freeze_unfreeze (line 15) | def test_freeze_unfreeze():
  function test_activation_stats_hook_validation (line 66) | def test_activation_stats_hook_validation():
  function test_extract_spp_stats (line 81) | def test_extract_spp_stats():
  function test_freeze_unfreeze_bn_root (line 99) | def test_freeze_unfreeze_bn_root():
  function test_activation_stats_functions (line 116) | def test_activation_stats_functions():
  function test_reparameterize_model (line 135) | def test_reparameterize_model():
  function test_get_state_dict_custom_unwrap (line 164) | def test_get_state_dict_custom_unwrap():
  function test_freeze_unfreeze_string_input (line 182) | def test_freeze_unfreeze_string_input():

FILE: timm/data/auto_augment.py
  function _interpolation (line 53) | def _interpolation(kwargs):
  function _check_args_tf (line 60) | def _check_args_tf(kwargs):
  function shear_x (line 66) | def shear_x(img, factor, **kwargs):
  function shear_y (line 71) | def shear_y(img, factor, **kwargs):
  function translate_x_rel (line 76) | def translate_x_rel(img, pct, **kwargs):
  function translate_y_rel (line 82) | def translate_y_rel(img, pct, **kwargs):
  function translate_x_abs (line 88) | def translate_x_abs(img, pixels, **kwargs):
  function translate_y_abs (line 93) | def translate_y_abs(img, pixels, **kwargs):
  function rotate (line 98) | def rotate(img, degrees, **kwargs):
  function auto_contrast (line 129) | def auto_contrast(img, **__):
  function invert (line 133) | def invert(img, **__):
  function equalize (line 137) | def equalize(img, **__):
  function solarize (line 141) | def solarize(img, thresh, **__):
  function solarize_add (line 145) | def solarize_add(img, add, thresh=128, **__):
  function posterize (line 161) | def posterize(img, bits_to_keep, **__):
  function contrast (line 167) | def contrast(img, factor, **__):
  function color (line 171) | def color(img, factor, **__):
  function brightness (line 175) | def brightness(img, factor, **__):
  function sharpness (line 179) | def sharpness(img, factor, **__):
  function gaussian_blur (line 183) | def gaussian_blur(img, factor, **__):
  function gaussian_blur_rand (line 188) | def gaussian_blur_rand(img, factor, **__):
  function desaturate (line 195) | def desaturate(img, factor, **_):
  function _randomly_negate (line 201) | def _randomly_negate(v):
  function _rotate_level_to_arg (line 206) | def _rotate_level_to_arg(level, _hparams):
  function _enhance_level_to_arg (line 213) | def _enhance_level_to_arg(level, _hparams):
  function _enhance_increasing_level_to_arg (line 218) | def _enhance_increasing_level_to_arg(level, _hparams):
  function _minmax_level_to_arg (line 226) | def _minmax_level_to_arg(level, _hparams, min_val=0., max_val=1.0, clamp...
  function _shear_level_to_arg (line 234) | def _shear_level_to_arg(level, _hparams):
  function _translate_abs_level_to_arg (line 241) | def _translate_abs_level_to_arg(level, hparams):
  function _translate_rel_level_to_arg (line 248) | def _translate_rel_level_to_arg(level, hparams):
  function _posterize_level_to_arg (line 256) | def _posterize_level_to_arg(level, _hparams):
  function _posterize_increasing_level_to_arg (line 263) | def _posterize_increasing_level_to_arg(level, hparams):
  function _posterize_original_level_to_arg (line 270) | def _posterize_original_level_to_arg(level, _hparams):
  function _solarize_level_to_arg (line 277) | def _solarize_level_to_arg(level, _hparams):
  function _solarize_increasing_level_to_arg (line 283) | def _solarize_increasing_level_to_arg(level, _hparams):
  function _solarize_add_level_to_arg (line 289) | def _solarize_add_level_to_arg(level, _hparams):
  class AugmentOp (line 357) | class AugmentOp:
    method __init__ (line 359) | def __init__(self, name, prob=0.5, magnitude=10, hparams=None):
    method __call__ (line 380) | def __call__(self, img):
    method __repr__ (line 398) | def __repr__(self):
  function auto_augment_policy_v0 (line 407) | def auto_augment_policy_v0(hparams):
  function auto_augment_policy_v0r (line 440) | def auto_augment_policy_v0r(hparams):
  function auto_augment_policy_original (line 474) | def auto_augment_policy_original(hparams):
  function auto_augment_policy_originalr (line 507) | def auto_augment_policy_originalr(hparams):
  function auto_augment_policy_3a (line 540) | def auto_augment_policy_3a(hparams):
  function auto_augment_policy (line 550) | def auto_augment_policy(name='v0', hparams=None):
  class AutoAugment (line 565) | class AutoAugment:
    method __init__ (line 567) | def __init__(self, policy):
    method __call__ (line 570) | def __call__(self, img):
    method __repr__ (line 576) | def __repr__(self):
  function auto_augment_transform (line 586) | def auto_augment_transform(config_str: str, hparams: Optional[Dict] = No...
  function _get_weighted_transforms (line 707) | def _get_weighted_transforms(transforms: Dict):
  function rand_augment_choices (line 714) | def rand_augment_choices(name: str, increasing=True):
  function rand_augment_ops (line 724) | def rand_augment_ops(
  class RandAugment (line 736) | class RandAugment:
    method __init__ (line 737) | def __init__(self, ops, num_layers=2, choice_weights=None):
    method __call__ (line 742) | def __call__(self, img):
    method __repr__ (line 754) | def __repr__(self):
  function rand_augment_transform (line 762) | def rand_augment_transform(
  function augmix_ops (line 863) | def augmix_ops(
  class AugMixAugment (line 878) | class AugMixAugment:
    method __init__ (line 884) | def __init__(self, ops, alpha=1., width=3, depth=-1, blended=False):
    method _calc_blended_weights (line 891) | def _calc_blended_weights(self, ws, m):
    method _apply_blended (line 901) | def _apply_blended(self, img, mixing_weights, m):
    method _apply_basic (line 917) | def _apply_basic(self, img, mixing_weights, m):
    method __call__ (line 934) | def __call__(self, img):
    method __repr__ (line 943) | def __repr__(self):
  function augment_and_mix_transform (line 951) | def augment_and_mix_transform(config_str: str, hparams: Optional[Dict] =...

FILE: timm/data/config.py
  function resolve_data_config (line 8) | def resolve_data_config(
  function resolve_model_data_config (line 103) | def resolve_model_data_config(

FILE: timm/data/dataset.py
  class ImageDataset (line 21) | class ImageDataset(data.Dataset):
    method __init__ (line 23) | def __init__(
    method __getitem__ (line 53) | def __getitem__(self, index):
    method __len__ (line 80) | def __len__(self):
    method filename (line 83) | def filename(self, index, basename=False, absolute=False):
    method filenames (line 86) | def filenames(self, basename=False, absolute=False):
  class IterableImageDataset (line 90) | class IterableImageDataset(data.IterableDataset):
    method __init__ (line 92) | def __init__(
    method __iter__ (line 136) | def __iter__(self):
    method __len__ (line 144) | def __len__(self):
    method set_epoch (line 150) | def set_epoch(self, count):
    method set_loader_cfg (line 155) | def set_loader_cfg(
    method filename (line 163) | def filename(self, index, basename=False, absolute=False):
    method filenames (line 166) | def filenames(self, basename=False, absolute=False):
  class AugMixDataset (line 170) | class AugMixDataset(torch.utils.data.Dataset):
    method __init__ (line 173) | def __init__(self, dataset, num_splits=2):
    method _set_transforms (line 181) | def _set_transforms(self, x):
    method transform (line 188) | def transform(self):
    method transform (line 192) | def transform(self, x):
    method _normalize (line 195) | def _normalize(self, x):
    method __getitem__ (line 198) | def __getitem__(self, i):
    method __len__ (line 206) | def __len__(self):

FILE: timm/data/dataset_factory.py
  function _search_split (line 43) | def _search_split(root, split):
  function create_dataset (line 63) | def create_dataset(

FILE: timm/data/dataset_info.py
  class DatasetInfo (line 5) | class DatasetInfo(ABC):
    method __init__ (line 7) | def __init__(self):
    method num_classes (line 11) | def num_classes(self):
    method label_names (line 15) | def label_names(self):
    method label_descriptions (line 19) | def label_descriptions(self, detailed: bool = False, as_dict: bool = F...
    method index_to_label_name (line 23) | def index_to_label_name(self, index) -> str:
    method index_to_description (line 27) | def index_to_description(self, index: int, detailed: bool = False) -> ...
    method label_name_to_description (line 31) | def label_name_to_description(self, label: str, detailed: bool = False...
  class CustomDatasetInfo (line 35) | class CustomDatasetInfo(DatasetInfo):
    method __init__ (line 38) | def __init__(
    method num_classes (line 53) | def num_classes(self):
    method label_names (line 56) | def label_names(self):
    method label_descriptions (line 59) | def label_descriptions(self, detailed: bool = False, as_dict: bool = F...
    method label_name_to_description (line 62) | def label_name_to_description(self, label: str, detailed: bool = False...
    method index_to_label_name (line 67) | def index_to_label_name(self, index) -> str:
    method index_to_description (line 71) | def index_to_description(self, index: int, detailed: bool = False) -> ...

FILE: timm/data/distributed_sampler.py
  class OrderedDistributedSampler (line 7) | class OrderedDistributedSampler(Sampler):
    method __init__ (line 22) | def __init__(self, dataset, num_replicas=None, rank=None):
    method __iter__ (line 37) | def __iter__(self):
    method __len__ (line 50) | def __len__(self):
  class RepeatAugSampler (line 54) | class RepeatAugSampler(Sampler):
    method __init__ (line 65) | def __init__(
    method __iter__ (line 101) | def __iter__(self):
    method __len__ (line 131) | def __len__(self):
    method set_epoch (line 134) | def set_epoch(self, epoch):

FILE: timm/data/imagenet_info.py
  function infer_imagenet_subset (line 33) | def infer_imagenet_subset(model_or_cfg) -> Optional[str]:
  class ImageNetInfo (line 48) | class ImageNetInfo(DatasetInfo):
    method __init__ (line 50) | def __init__(self, subset: str = 'imagenet-1k'):
    method num_classes (line 69) | def num_classes(self):
    method label_names (line 72) | def label_names(self):
    method label_descriptions (line 75) | def label_descriptions(self, detailed: bool = False, as_dict: bool = F...
    method index_to_label_name (line 81) | def index_to_label_name(self, index) -> str:
    method index_to_description (line 86) | def index_to_description(self, index: int, detailed: bool = False) -> ...
    method label_name_to_description (line 90) | def label_name_to_description(self, label: str, detailed: bool = False...

FILE: timm/data/loader.py
  function fast_collate (line 29) | def fast_collate(batch):
  function adapt_to_chs (line 68) | def adapt_to_chs(x, n):
  class PrefetchLoader (line 80) | class PrefetchLoader:
    method __init__ (line 82) | def __init__(
    method __iter__ (line 123) | def __iter__(self):
    method __len__ (line 160) | def __len__(self):
    method sampler (line 164) | def sampler(self):
    method dataset (line 168) | def dataset(self):
    method mixup_enabled (line 172) | def mixup_enabled(self):
    method mixup_enabled (line 179) | def mixup_enabled(self, x):
  function _worker_init (line 184) | def _worker_init(worker_id, worker_seeding='all'):
  function create_loader (line 200) | def create_loader(
  class MultiEpochsDataLoader (line 381) | class MultiEpochsDataLoader(torch.utils.data.DataLoader):
    method __init__ (line 383) | def __init__(self, *args, **kwargs):
    method __len__ (line 393) | def __len__(self):
    method __iter__ (line 396) | def __iter__(self):
  class _RepeatSampler (line 401) | class _RepeatSampler(object):
    method __init__ (line 408) | def __init__(self, sampler):
    method __iter__ (line 411) | def __iter__(self):

FILE: timm/data/mixup.py
  function one_hot (line 17) | def one_hot(x, num_classes, on_value=1., off_value=0.):
  function mixup_target (line 22) | def mixup_target(target, num_classes, lam=1., smoothing=0.0):
  function rand_bbox (line 30) | def rand_bbox(img_shape, lam, margin=0., count=None):
  function rand_bbox_minmax (line 54) | def rand_bbox_minmax(img_shape, minmax, count=None):
  function cutmix_bbox_and_lam (line 77) | def cutmix_bbox_and_lam(img_shape, lam, ratio_minmax=None, correct_lam=T...
  class Mixup (line 90) | class Mixup:
    method __init__ (line 104) | def __init__(self, mixup_alpha=1., cutmix_alpha=0., cutmix_minmax=None...
    method _params_per_elem (line 121) | def _params_per_elem(self, batch_size):
    method _params_per_batch (line 141) | def _params_per_batch(self):
    method _mix_elem (line 159) | def _mix_elem(self, x):
    method _mix_pair (line 176) | def _mix_pair(self, x):
    method _mix_batch (line 196) | def _mix_batch(self, x):
    method __call__ (line 209) | def __call__(self, x, target):
  class FastCollateMixup (line 221) | class FastCollateMixup(Mixup):
    method _mix_elem_collate (line 227) | def _mix_elem_collate(self, output, batch, half=False):
    method _mix_pair_collate (line 262) | def _mix_pair_collate(self, output, batch):
    method _mix_batch_collate (line 303) | def _mix_batch_collate(self, output, batch):
    method __call__ (line 332) | def __call__(self, batch, _=None):

FILE: timm/data/naflex_dataset.py
  function calculate_naflex_batch_size (line 30) | def calculate_naflex_batch_size(
  class NaFlexCollator (line 73) | class NaFlexCollator:
    method __init__ (line 76) | def __init__(
    method __call__ (line 87) | def __call__(self, batch: List[Tuple[Dict[str, torch.Tensor], Union[in...
  function _resolve_patch_cfg (line 154) | def _resolve_patch_cfg(
  class NaFlexMapDatasetWrapper (line 198) | class NaFlexMapDatasetWrapper(IterableDataset):
    method __init__ (line 210) | def __init__(
    method _create_canonical_schedule (line 305) | def _create_canonical_schedule(self):
    method _prepare_epoch_batches (line 391) | def _prepare_epoch_batches(self, epoch: int):
    method set_epoch (line 477) | def set_epoch(self, epoch: int) -> None:
    method __len__ (line 488) | def __len__(self) -> int:
    method __iter__ (line 496) | def __iter__(self) -> Iterator[Tuple[Dict[str, torch.Tensor], torch.Te...

FILE: timm/data/naflex_loader.py
  class NaFlexPrefetchLoader (line 27) | class NaFlexPrefetchLoader:
    method __init__ (line 30) | def __init__(
    method __iter__ (line 86) | def __iter__(self) -> Iterator[Tuple[Dict[str, torch.Tensor], torch.Te...
    method __len__ (line 164) | def __len__(self) -> int:
    method sampler (line 173) | def sampler(self):
    method dataset (line 182) | def dataset(self):
  function create_naflex_loader (line 191) | def create_naflex_loader(

FILE: timm/data/naflex_mixup.py
  function mix_batch_variable_size (line 23) | def mix_batch_variable_size(
  function smoothed_sparse_target (line 132) | def smoothed_sparse_target(
  function pairwise_mixup_target (line 151) | def pairwise_mixup_target(
  class NaFlexMixup (line 180) | class NaFlexMixup:
    method __init__ (line 183) | def __init__(
    method __call__ (line 213) | def __call__(

FILE: timm/data/naflex_random_erasing.py
  class PatchRandomErasing (line 21) | class PatchRandomErasing:
    method __init__ (line 29) | def __init__(
    method _get_values (line 87) | def _get_values(
    method _drop_patches (line 122) | def _drop_patches(
    method _erase_patches (line 163) | def _erase_patches(
    method _erase_region (line 209) | def _erase_region(
    method __call__ (line 282) | def __call__(
    method __repr__ (line 345) | def __repr__(self) -> str:

FILE: timm/data/naflex_transforms.py
  function get_image_size_for_seq (line 26) | def get_image_size_for_seq(
  class ResizeToSequence (line 126) | class ResizeToSequence(torch.nn.Module):
    method __init__ (line 132) | def __init__(
    method forward (line 163) | def forward(self, img: torch.Tensor) -> torch.Tensor:
  class ResizeKeepRatioToSequence (line 192) | class ResizeKeepRatioToSequence(torch.nn.Module):
    method __init__ (line 197) | def __init__(
    method get_params (line 244) | def get_params(
    method forward (line 331) | def forward(self, img):
    method __repr__ (line 356) | def __repr__(self):
  class CenterCropToSequence (line 365) | class CenterCropToSequence(torch.nn.Module):
    method __init__ (line 367) | def __init__(
    method forward (line 383) | def forward(self, img):
  class RandomCropToSequence (line 397) | class RandomCropToSequence(torch.nn.Module):
    method __init__ (line 405) | def __init__(
    method get_params (line 429) | def get_params(img, target_size):
    method forward (line 448) | def forward(self, img):
    method __repr__ (line 477) | def __repr__(self) -> str:
  function _validate_range (line 483) | def _validate_range(value, name, length=2):
  class RandomResizedCropToSequence (line 496) | class RandomResizedCropToSequence(torch.nn.Module):
    method __init__ (line 539) | def __init__(
    method get_params (line 587) | def get_params(
    method forward (line 703) | def forward(self, img: torch.Tensor) -> torch.Tensor:
    method __repr__ (line 732) | def __repr__(self) -> str:
  function patchify_image (line 751) | def patchify_image(
  class Patchify (line 787) | class Patchify(torch.nn.Module):
    method __init__ (line 790) | def __init__(
    method forward (line 799) | def forward(self, img):

FILE: timm/data/random_erasing.py
  function _get_pixels (line 14) | def _get_pixels(per_pixel, rand_color, patch_size, dtype=torch.float32, ...
  class RandomErasing (line 26) | class RandomErasing:
    method __init__ (line 46) | def __init__(
    method _erase (line 78) | def _erase(self, img, chan, img_h, img_w, dtype):
    method __call__ (line 102) | def __call__(self, input):
    method __repr__ (line 113) | def __repr__(self):

FILE: timm/data/readers/class_map.py
  function load_class_map (line 5) | def load_class_map(map_or_filename, root=''):

FILE: timm/data/readers/img_extensions.py
  function _set_extensions (line 10) | def _set_extensions(extensions):
  function _valid_extension (line 18) | def _valid_extension(x: str):
  function is_img_extension (line 22) | def is_img_extension(ext):
  function get_img_extensions (line 26) | def get_img_extensions(as_set=False):
  function set_img_extensions (line 30) | def set_img_extensions(extensions):
  function add_img_extensions (line 37) | def add_img_extensions(ext):
  function del_img_extensions (line 46) | def del_img_extensions(ext):

FILE: timm/data/readers/reader.py
  class Reader (line 4) | class Reader:
    method __init__ (line 5) | def __init__(self):
    method _filename (line 9) | def _filename(self, index, basename=False, absolute=False):
    method filename (line 12) | def filename(self, index, basename=False, absolute=False):
    method filenames (line 15) | def filenames(self, basename=False, absolute=False):

FILE: timm/data/readers/reader_factory.py
  function create_reader (line 8) | def create_reader(

FILE: timm/data/readers/reader_hfds.py
  function get_class_labels (line 22) | def get_class_labels(info, label_key='label'):
  class ReaderHfds (line 30) | class ReaderHfds(Reader):
    method __init__ (line 32) | def __init__(
    method __getitem__ (line 77) | def __getitem__(self, index):
    method __len__ (line 97) | def __len__(self):
    method _filename (line 100) | def _filename(self, index, basename=False, absolute=False):

FILE: timm/data/readers/reader_hfids.py
  class ReaderHfids (line 29) | class ReaderHfids(Reader):
    method __init__ (line 30) | def __init__(
    method set_epoch (line 112) | def set_epoch(self, count):
    method set_loader_cfg (line 116) | def set_loader_cfg(
    method _lazy_init (line 126) | def _lazy_init(self):
    method _num_samples_per_worker (line 159) | def _num_samples_per_worker(self):
    method __iter__ (line 168) | def __iter__(self):
    method __len__ (line 196) | def __len__(self):
    method _filename (line 200) | def _filename(self, index, basename=False, absolute=False):
    method filenames (line 203) | def filenames(self, basename=False, absolute=False):

FILE: timm/data/readers/reader_image_folder.py
  function find_images_and_targets (line 18) | def find_images_and_targets(
  class ReaderImageFolder (line 59) | class ReaderImageFolder(Reader):
    method __init__ (line 61) | def __init__(
    method __getitem__ (line 86) | def __getitem__(self, index):
    method __len__ (line 90) | def __len__(self):
    method _filename (line 93) | def _filename(self, index, basename=False, absolute=False):

FILE: timm/data/readers/reader_image_in_tar.py
  class TarState (line 31) | class TarState:
    method __init__ (line 33) | def __init__(self, tf: tarfile.TarFile = None, ti: tarfile.TarInfo = N...
    method reset (line 38) | def reset(self):
  function _extract_tarinfo (line 42) | def _extract_tarinfo(tf: tarfile.TarFile, parent_info: Dict, extensions:...
  function extract_tarinfos (line 63) | def extract_tarinfos(
  class ReaderImageInTar (line 172) | class ReaderImageInTar(Reader):
    method __init__ (line 176) | def __init__(self, root, class_map='', cache_tarfiles=True, cache_tari...
    method __len__ (line 197) | def __len__(self):
    method __getitem__ (line 200) | def __getitem__(self, index):
    method _filename (line 225) | def _filename(self, index, basename=False, absolute=False):

FILE: timm/data/readers/reader_image_tar.py
  function extract_tarinfo (line 18) | def extract_tarinfo(tarfile, class_to_idx=None, sort=True):
  class ReaderImageTar (line 41) | class ReaderImageTar(Reader):
    method __init__ (line 46) | def __init__(self, root, class_map=''):
    method __getitem__ (line 60) | def __getitem__(self, index):
    method __len__ (line 67) | def __len__(self):
    method _filename (line 70) | def _filename(self, index, basename=False, absolute=False):

FILE: timm/data/readers/reader_tfds.py
  function decode_example (line 49) | def decode_example(serialized_image, feature, dct_method='INTEGER_ACCURA...
  function even_split_indices (line 57) | def even_split_indices(split, n, num_samples):
  function get_class_labels (line 62) | def get_class_labels(info):
  class ReaderTfds (line 70) | class ReaderTfds(Reader):
    method __init__ (line 94) | def __init__(
    method set_epoch (line 183) | def set_epoch(self, count):
    method set_loader_cfg (line 186) | def set_loader_cfg(
    method _lazy_init (line 196) | def _lazy_init(self):
    method _num_samples_per_worker (line 278) | def _num_samples_per_worker(self):
    method __iter__ (line 287) | def __iter__(self):
    method __len__ (line 331) | def __len__(self):
    method _filename (line 335) | def _filename(self, index, basename=False, absolute=False):
    method filenames (line 338) | def filenames(self, basename=False, absolute=False):

FILE: timm/data/readers/reader_wds.py
  function _load_info (line 42) | def _load_info(root, names=('_info.json', 'info.json')):
  class SplitInfo (line 67) | class SplitInfo:
  function _parse_split_info (line 75) | def _parse_split_info(split: str, info: Dict):
  function log_and_continue (line 126) | def log_and_continue(exn):
  function _decode (line 135) | def _decode(
  function pytorch_worker_seed (line 171) | def pytorch_worker_seed():
  class detshuffle2 (line 184) | class detshuffle2(wds.PipelineStage):
    method __init__ (line 185) | def __init__(
    method run (line 197) | def run(self, src):
  class ResampledShards2 (line 218) | class ResampledShards2(IterableDataset):
    method __init__ (line 221) | def __init__(
    method __iter__ (line 243) | def __iter__(self):
  class ReaderWds (line 262) | class ReaderWds(Reader):
    method __init__ (line 263) | def __init__(
    method set_epoch (line 337) | def set_epoch(self, count):
    method set_loader_cfg (line 340) | def set_loader_cfg(
    method _lazy_init (line 350) | def _lazy_init(self):
    method _split_by_node_and_worker (line 403) | def _split_by_node_and_worker(self, src):
    method _num_samples_per_worker (line 411) | def _num_samples_per_worker(self):
    method __iter__ (line 419) | def __iter__(self):
    method __len__ (line 443) | def __len__(self):
    method _filename (line 447) | def _filename(self, index, basename=False, absolute=False):
    method filenames (line 450) | def filenames(self, basename=False, absolute=False):

FILE: timm/data/readers/shared_count.py
  class SharedCount (line 4) | class SharedCount:
    method __init__ (line 5) | def __init__(self, epoch: int = 0):
    method value (line 9) | def value(self):
    method value (line 13) | def value(self, epoch):

FILE: timm/data/real_labels.py
  class RealLabelsImagenet (line 13) | class RealLabelsImagenet:
    method __init__ (line 15) | def __init__(self, filenames, real_json=None, topk=(1, 5)):
    method add_result (line 30) | def add_result(self, output):
    method get_accuracy (line 43) | def get_accuracy(self, k=None):

FILE: timm/data/tf_preprocessing.py
  function distorted_bounding_box_crop (line 33) | def distorted_bounding_box_crop(image_bytes,
  function _at_least_x_are_equal (line 85) | def _at_least_x_are_equal(a, b, x):
  function _decode_and_random_crop (line 92) | def _decode_and_random_crop(image_bytes, image_size, resize_method):
  function _decode_and_center_crop (line 114) | def _decode_and_center_crop(image_bytes, image_size, resize_method):
  function _flip (line 135) | def _flip(image):
  function preprocess_for_train (line 141) | def preprocess_for_train(image_bytes, use_bfloat16, image_size=IMAGE_SIZ...
  function preprocess_for_eval (line 162) | def preprocess_for_eval(image_bytes, use_bfloat16, image_size=IMAGE_SIZE...
  function preprocess_image (line 182) | def preprocess_image(image_bytes,
  class TfPreprocessTransform (line 205) | class TfPreprocessTransform:
    method __init__ (line 207) | def __init__(self, is_training=False, size=224, interpolation='bicubic'):
    method _build_tf_graph (line 215) | def _build_tf_graph(self):
    method __call__ (line 225) | def __call__(self, image_bytes):

FILE: timm/data/transforms.py
  class ToNumpy (line 25) | class ToNumpy:
    method __call__ (line 27) | def __call__(self, pil_img):
  class ToTensor (line 35) | class ToTensor:
    method __init__ (line 37) | def __init__(self, dtype=torch.float32):
    method __call__ (line 40) | def __call__(self, pil_img):
  class MaybeToTensor (line 44) | class MaybeToTensor(transforms.ToTensor):
    method __init__ (line 48) | def __init__(self) -> None:
    method __call__ (line 51) | def __call__(self, pic) -> torch.Tensor:
    method __repr__ (line 63) | def __repr__(self) -> str:
  class MaybePILToTensor (line 67) | class MaybePILToTensor:
    method __init__ (line 71) | def __init__(self) -> None:
    method __call__ (line 74) | def __call__(self, pic):
    method __repr__ (line 88) | def __repr__(self) -> str:
  function str_to_pil_interp (line 132) | def str_to_pil_interp(mode_str):
  function str_to_interp_mode (line 136) | def str_to_interp_mode(mode_str):
  function interp_mode_to_str (line 143) | def interp_mode_to_str(mode):
  function _setup_size (line 153) | def _setup_size(size, error_msg="Please provide only two dimensions (h, ...
  class RandomResizedCropAndInterpolation (line 166) | class RandomResizedCropAndInterpolation:
    method __init__ (line 181) | def __init__(
    method get_params (line 203) | def get_params(img, scale, ratio):
    method __call__ (line 245) | def __call__(self, img):
    method __repr__ (line 260) | def __repr__(self):
  function center_crop_or_pad (line 272) | def center_crop_or_pad(
  class CenterCropOrPad (line 314) | class CenterCropOrPad(torch.nn.Module):
    method __init__ (line 326) | def __init__(
    method forward (line 337) | def forward(self, img):
    method __repr__ (line 347) | def __repr__(self) -> str:
  function crop_or_pad (line 351) | def crop_or_pad(
  class RandomCropOrPad (line 379) | class RandomCropOrPad(torch.nn.Module):
    method __init__ (line 383) | def __init__(
    method get_params (line 395) | def get_params(img, size):
    method forward (line 403) | def forward(self, img):
    method __repr__ (line 422) | def __repr__(self) -> str:
  class RandomPad (line 426) | class RandomPad:
    method __init__ (line 427) | def __init__(self, input_size, fill=0):
    method get_params (line 432) | def get_params(img, input_size):
    method __call__ (line 442) | def __call__(self, img):
  class ResizeKeepRatio (line 448) | class ResizeKeepRatio:
    method __init__ (line 452) | def __init__(
    method get_params (line 491) | def get_params(
    method __call__ (line 530) | def __call__(self, img):
    method __repr__ (line 550) | def __repr__(self):
  class TrimBorder (line 567) | class TrimBorder(torch.nn.Module):
    method __init__ (line 569) | def __init__(
    method forward (line 576) | def forward(self, img):

FILE: timm/data/transforms_factory.py
  function transforms_noaug_train (line 20) | def transforms_noaug_train(
  function transforms_imagenet_train (line 65) | def transforms_imagenet_train(
  function transforms_imagenet_eval (line 272) | def transforms_imagenet_eval(
  function create_transform (line 377) | def create_transform(

FILE: timm/layers/_fx.py
  function register_notrace_module (line 30) | def register_notrace_module(module: Type[nn.Module]):
  function is_notrace_module (line 38) | def is_notrace_module(module: Type[nn.Module]):
  function get_notrace_modules (line 42) | def get_notrace_modules():
  function register_notrace_function (line 50) | def register_notrace_function(name_or_fn):
  function is_notrace_function (line 55) | def is_notrace_function(func: Callable):
  function get_notrace_functions (line 59) | def get_notrace_functions():
  function get_graph_node_names (line 63) | def get_graph_node_names(model: nn.Module) -> Tuple[List[str], List[str]]:
  function create_feature_extractor (line 73) | def create_feature_extractor(model: nn.Module, return_nodes: Union[Dict[...

FILE: timm/layers/activations.py
  function swish (line 14) | def swish(x, inplace: bool = False):
  class Swish (line 20) | class Swish(nn.Module):
    method __init__ (line 21) | def __init__(self, inplace: bool = False):
    method forward (line 25) | def forward(self, x):
  function mish (line 29) | def mish(x, inplace: bool = False):
  class Mish (line 36) | class Mish(nn.Module):
    method __init__ (line 39) | def __init__(self, inplace: bool = False):
    method forward (line 42) | def forward(self, x):
  function sigmoid (line 46) | def sigmoid(x, inplace: bool = False):
  class Sigmoid (line 51) | class Sigmoid(nn.Module):
    method __init__ (line 52) | def __init__(self, inplace: bool = False):
    method forward (line 56) | def forward(self, x):
  function tanh (line 60) | def tanh(x, inplace: bool = False):
  class Tanh (line 65) | class Tanh(nn.Module):
    method __init__ (line 66) | def __init__(self, inplace: bool = False):
    method forward (line 70) | def forward(self, x):
  function hard_swish (line 74) | def hard_swish(x, inplace: bool = False):
  class HardSwish (line 79) | class HardSwish(nn.Module):
    method __init__ (line 80) | def __init__(self, inplace: bool = False):
    method forward (line 84) | def forward(self, x):
  function hard_sigmoid (line 88) | def hard_sigmoid(x, inplace: bool = False):
  class HardSigmoid (line 95) | class HardSigmoid(nn.Module):
    method __init__ (line 96) | def __init__(self, inplace: bool = False):
    method forward (line 100) | def forward(self, x):
  function hard_mish (line 104) | def hard_mish(x, inplace: bool = False):
  class HardMish (line 115) | class HardMish(nn.Module):
    method __init__ (line 116) | def __init__(self, inplace: bool = False):
    method forward (line 120) | def forward(self, x):
  class PReLU (line 124) | class PReLU(nn.PReLU):
    method __init__ (line 127) | def __init__(self, num_parameters: int = 1, init: float = 0.25, inplac...
    method forward (line 130) | def forward(self, input: torch.Tensor) -> torch.Tensor:
  function gelu (line 134) | def gelu(x: torch.Tensor, inplace: bool = False) -> torch.Tensor:
  class GELU (line 138) | class GELU(nn.Module):
    method __init__ (line 141) | def __init__(self, inplace: bool = False):
    method forward (line 144) | def forward(self, input: torch.Tensor) -> torch.Tensor:
  function gelu_tanh (line 148) | def gelu_tanh(x: torch.Tensor, inplace: bool = False) -> torch.Tensor:
  class GELUTanh (line 152) | class GELUTanh(nn.Module):
    method __init__ (line 155) | def __init__(self, inplace: bool = False):
    method forward (line 158) | def forward(self, input: torch.Tensor) -> torch.Tensor:
  function quick_gelu (line 162) | def quick_gelu(x: torch.Tensor, inplace: bool = False) -> torch.Tensor:
  class QuickGELU (line 166) | class QuickGELU(nn.Module):
    method __init__ (line 169) | def __init__(self, inplace: bool = False):
    method forward (line 172) | def forward(self, input: torch.Tensor) -> torch.Tensor:

FILE: timm/layers/activations_me.py
  function swish_fwd (line 17) | def swish_fwd(x):
  function swish_bwd (line 21) | def swish_bwd(x, grad_output):
  class SwishAutoFn (line 26) | class SwishAutoFn(torch.autograd.Function):
    method symbolic (line 32) | def symbolic(g, x):
    method forward (line 36) | def forward(ctx, x):
    method backward (line 41) | def backward(ctx, grad_output):
  function swish_me (line 46) | def swish_me(x, inplace=False):
  class SwishMe (line 50) | class SwishMe(nn.Module):
    method __init__ (line 51) | def __init__(self, inplace: bool = False):
    method forward (line 54) | def forward(self, x):
  function mish_fwd (line 58) | def mish_fwd(x):
  function mish_bwd (line 62) | def mish_bwd(x, grad_output):
  class MishAutoFn (line 68) | class MishAutoFn(torch.autograd.Function):
    method forward (line 73) | def forward(ctx, x):
    method backward (line 78) | def backward(ctx, grad_output):
  function mish_me (line 83) | def mish_me(x, inplace=False):
  class MishMe (line 87) | class MishMe(nn.Module):
    method __init__ (line 88) | def __init__(self, inplace: bool = False):
    method forward (line 91) | def forward(self, x):
  function hard_sigmoid_fwd (line 95) | def hard_sigmoid_fwd(x, inplace: bool = False):
  function hard_sigmoid_bwd (line 99) | def hard_sigmoid_bwd(x, grad_output):
  class HardSigmoidAutoFn (line 104) | class HardSigmoidAutoFn(torch.autograd.Function):
    method forward (line 106) | def forward(ctx, x):
    method backward (line 111) | def backward(ctx, grad_output):
  function hard_sigmoid_me (line 116) | def hard_sigmoid_me(x, inplace: bool = False):
  class HardSigmoidMe (line 120) | class HardSigmoidMe(nn.Module):
    method __init__ (line 121) | def __init__(self, inplace: bool = False):
    method forward (line 124) | def forward(self, x):
  function hard_swish_fwd (line 128) | def hard_swish_fwd(x):
  function hard_swish_bwd (line 132) | def hard_swish_bwd(x, grad_output):
  class HardSwishAutoFn (line 138) | class HardSwishAutoFn(torch.autograd.Function):
    method forward (line 141) | def forward(ctx, x):
    method backward (line 146) | def backward(ctx, grad_output):
    method symbolic (line 151) | def symbolic(g, self):
  function hard_swish_me (line 158) | def hard_swish_me(x, inplace=False):
  class HardSwishMe (line 162) | class HardSwishMe(nn.Module):
    method __init__ (line 163) | def __init__(self, inplace: bool = False):
    method forward (line 166) | def forward(self, x):
  function hard_mish_fwd (line 170) | def hard_mish_fwd(x):
  function hard_mish_bwd (line 174) | def hard_mish_bwd(x, grad_output):
  class HardMishAutoFn (line 180) | class HardMishAutoFn(torch.autograd.Function):
    method forward (line 186) | def forward(ctx, x):
    method backward (line 191) | def backward(ctx, grad_output):
  function hard_mish_me (line 196) | def hard_mish_me(x, inplace: bool = False):
  class HardMishMe (line 200) | class HardMishMe(nn.Module):
    method __init__ (line 201) | def __init__(self, inplace: bool = False):
    method forward (line 204) | def forward(self, x):

FILE: timm/layers/adaptive_avgmax_pool.py
  function adaptive_pool_feat_mult (line 23) | def adaptive_pool_feat_mult(pool_type='avg'):
  function adaptive_avgmax_pool2d (line 30) | def adaptive_avgmax_pool2d(x, output_size: _int_tuple_2_t = 1):
  function adaptive_catavgmax_pool2d (line 36) | def adaptive_catavgmax_pool2d(x, output_size: _int_tuple_2_t = 1):
  function select_adaptive_pool2d (line 42) | def select_adaptive_pool2d(x, pool_type='avg', output_size: _int_tuple_2...
  class FastAdaptiveAvgPool (line 58) | class FastAdaptiveAvgPool(nn.Module):
    method __init__ (line 59) | def __init__(self, flatten: bool = False, input_fmt: F = 'NCHW'):
    method forward (line 64) | def forward(self, x):
  class FastAdaptiveMaxPool (line 68) | class FastAdaptiveMaxPool(nn.Module):
    method __init__ (line 69) | def __init__(self, flatten: bool = False, input_fmt: str = 'NCHW'):
    method forward (line 74) | def forward(self, x):
  class FastAdaptiveAvgMaxPool (line 78) | class FastAdaptiveAvgMaxPool(nn.Module):
    method __init__ (line 79) | def __init__(self, flatten: bool = False, input_fmt: str = 'NCHW'):
    method forward (line 84) | def forward(self, x):
  class FastAdaptiveCatAvgMaxPool (line 90) | class FastAdaptiveCatAvgMaxPool(nn.Module):
    method __init__ (line 91) | def __init__(self, flatten: bool = False, input_fmt: str = 'NCHW'):
    method forward (line 100) | def forward(self, x):
  class AdaptiveAvgMaxPool2d (line 106) | class AdaptiveAvgMaxPool2d(nn.Module):
    method __init__ (line 107) | def __init__(self, output_size: _int_tuple_2_t = 1):
    method forward (line 111) | def forward(self, x):
  class AdaptiveCatAvgMaxPool2d (line 115) | class AdaptiveCatAvgMaxPool2d(nn.Module):
    method __init__ (line 116) | def __init__(self, output_size: _int_tuple_2_t = 1):
    method forward (line 120) | def forward(self, x):
  class SelectAdaptivePool2d (line 124) | class SelectAdaptivePool2d(nn.Module):
    method __init__ (line 127) | def __init__(
    method is_identity (line 168) | def is_identity(self):
    method forward (line 171) | def forward(self, x):
    method feat_mult (line 176) | def feat_mult(self):
    method __repr__ (line 179) | def __repr__(self):

FILE: timm/layers/attention.py
  function maybe_add_mask (line 17) | def maybe_add_mask(scores: torch.Tensor, attn_mask: Optional[torch.Tenso...
  function resolve_self_attn_mask (line 23) | def resolve_self_attn_mask(
  class Attention (line 43) | class Attention(nn.Module):
    method __init__ (line 53) | def __init__(
    method forward (line 108) | def forward(
  class AttentionRope (line 142) | class AttentionRope(nn.Module):
    method __init__ (line 152) | def __init__(
    method forward (line 224) | def forward(

FILE: timm/layers/attention2d.py
  class MultiQueryAttentionV2 (line 13) | class MultiQueryAttentionV2(nn.Module):
    method __init__ (line 27) | def __init__(
    method reset_parameters (line 57) | def reset_parameters(self):
    method _reshape_input (line 64) | def _reshape_input(self, t):
    method forward (line 72) | def forward(self, x, m: Optional[torch.Tensor] = None):
  class MultiQueryAttention2d (line 94) | class MultiQueryAttention2d(nn.Module):
    method __init__ (line 107) | def __init__(
    method init_weights (line 234) | def init_weights(self):
    method _reshape_input (line 244) | def _reshape_input(self, t: torch.Tensor):
    method _reshape_projected_query (line 253) | def _reshape_projected_query(self, t: torch.Tensor, num_heads: int, ke...
    method _reshape_output (line 262) | def _reshape_output(self, t: torch.Tensor, num_heads: int, h_px: int, ...
    method forward (line 270) | def forward(self, x, attn_mask: Optional[torch.Tensor] = None):
  class Attention2d (line 320) | class Attention2d(nn.Module):
    method __init__ (line 324) | def __init__(
    method forward (line 351) | def forward(self, x, attn_mask: Optional[torch.Tensor] = None):

FILE: timm/layers/attention_pool.py
  class AttentionPoolLatent (line 13) | class AttentionPoolLatent(nn.Module):
    method __init__ (line 20) | def __init__(
    method init_weights (line 89) | def init_weights(self):
    method forward (line 94) | def forward(self, x, attn_mask: Optional[torch.Tensor] = None):
  class AttentionPoolPrr (line 132) | class AttentionPoolPrr(nn.Module):
    method __init__ (line 143) | def __init__(
    method forward (line 172) | def forward(self, x: torch.Tensor) -> torch.Tensor:

FILE: timm/layers/attention_pool2d.py
  class RotAttentionPool2d (line 22) | class RotAttentionPool2d(nn.Module):
    method __init__ (line 36) | def __init__(
    method init_weights (line 103) | def init_weights(self, zero_init_last: bool = False):
    method reset (line 117) | def reset(self, num_classes: Optional[int] = None, pool_type: Optional...
    method _pool (line 126) | def _pool(self, x: torch.Tensor, H: int, W: int) -> torch.Tensor:
    method forward (line 134) | def forward(self, x, pre_logits: bool = False):
  class AttentionPool2d (line 174) | class AttentionPool2d(nn.Module):
    method __init__ (line 187) | def __init__(
    method init_weights (line 247) | def init_weights(self, zero_init_last: bool = False):
    method reset (line 262) | def reset(self, num_classes: Optional[int] = None, pool_type: Optional...
    method _pool (line 271) | def _pool(self, x: torch.Tensor, H: int, W: int) -> torch.Tensor:
    method forward (line 279) | def forward(self, x, pre_logits: bool = False):

FILE: timm/layers/blur_pool.py
  class BlurPool2d (line 20) | class BlurPool2d(nn.Module):
    method __init__ (line 33) | def __init__(
    method reset_parameters (line 57) | def reset_parameters(self) -> None:
    method _init_buffers (line 61) | def _init_buffers(self) -> None:
    method forward (line 74) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method init_non_persistent_buffers (line 84) | def init_non_persistent_buffers(self) -> None:
  function _normalize_aa_layer (line 89) | def _normalize_aa_layer(aa_layer: LayerType) -> Callable[..., nn.Module]:
  function _underlying_cls (line 104) | def _underlying_cls(layer_callable: Callable[..., nn.Module]):
  function _is_blurpool (line 111) | def _is_blurpool(layer_callable: Callable[..., nn.Module]) -> bool:
  function create_aa (line 122) | def create_aa(

FILE: timm/layers/bottleneck_attn.py
  function rel_logits_1d (line 28) | def rel_logits_1d(q, rel_k, permute_mask: List[int]):
  class PosEmbedRel (line 56) | class PosEmbedRel(nn.Module):
    method __init__ (line 61) | def __init__(
    method reset_parameters (line 80) | def reset_parameters(self):
    method forward (line 84) | def forward(self, q):
  class BottleneckAttn (line 100) | class BottleneckAttn(nn.Module):
    method __init__ (line 122) | def __init__(
    method reset_parameters (line 158) | def reset_parameters(self):
    method forward (line 163) | def forward(self, x):

FILE: timm/layers/cbam.py
  class ChannelAttn (line 21) | class ChannelAttn(nn.Module):
    method __init__ (line 24) | def __init__(
    method forward (line 45) | def forward(self, x):
  class LightChannelAttn (line 51) | class LightChannelAttn(ChannelAttn):
    method __init__ (line 54) | def __init__(
    method forward (line 69) | def forward(self, x):
  class SpatialAttn (line 75) | class SpatialAttn(nn.Module):
    method __init__ (line 78) | def __init__(
    method forward (line 89) | def forward(self, x):
  class LightSpatialAttn (line 95) | class LightSpatialAttn(nn.Module):
    method __init__ (line 98) | def __init__(
    method forward (line 109) | def forward(self, x):
  class CbamModule (line 115) | class CbamModule(nn.Module):
    method __init__ (line 116) | def __init__(
    method forward (line 143) | def forward(self, x):
  class LightCbamModule (line 149) | class LightCbamModule(nn.Module):
    method __init__ (line 150) | def __init__(
    method forward (line 177) | def forward(self, x):

FILE: timm/layers/classifier.py
  function _create_pool (line 18) | def _create_pool(
  function _create_fc (line 37) | def _create_fc(num_features, num_classes, use_conv=False, device=None, d...
  function create_classifier (line 47) | def create_classifier(
  class ClassifierHead (line 77) | class ClassifierHead(nn.Module):
    method __init__ (line 80) | def __init__(
    method reset (line 117) | def reset(self, num_classes: int, pool_type: Optional[str] = None):
    method forward (line 136) | def forward(self, x, pre_logits: bool = False):
  class NormMlpClassifierHead (line 145) | class NormMlpClassifierHead(nn.Module):
    method __init__ (line 148) | def __init__(
    method reset (line 194) | def reset(self, num_classes: int, pool_type: Optional[str] = None):
    method forward (line 211) | def forward(self, x, pre_logits: bool = False):
  class ClNormMlpClassifierHead (line 223) | class ClNormMlpClassifierHead(nn.Module):
    method __init__ (line 226) | def __init__(
    method reset (line 273) | def reset(self, num_classes: int, pool_type: Optional[str] = None, res...
    method _global_pool (line 282) | def _global_pool(self, x):
    method forward (line 292) | def forward(self, x, pre_logits: bool = False):

FILE: timm/layers/cond_conv2d.py
  function get_condconv_initializer (line 23) | def get_condconv_initializer(initializer, num_experts, expert_shape):
  class CondConv2d (line 37) | class CondConv2d(nn.Module):
    method __init__ (line 46) | def __init__(
    method reset_parameters (line 89) | def reset_parameters(self):
    method forward (line 100) | def forward(self, x, routing_weights):

FILE: timm/layers/config.py
  function is_no_jit (line 44) | def is_no_jit():
  class set_no_jit (line 48) | class set_no_jit:
    method __init__ (line 49) | def __init__(self, mode: bool) -> None:
    method __enter__ (line 54) | def __enter__(self) -> None:
    method __exit__ (line 57) | def __exit__(self, *args: Any) -> bool:
  function is_exportable (line 63) | def is_exportable():
  class set_exportable (line 67) | class set_exportable:
    method __init__ (line 68) | def __init__(self, mode: bool) -> None:
    method __enter__ (line 73) | def __enter__(self) -> None:
    method __exit__ (line 76) | def __exit__(self, *args: Any) -> bool:
  function is_scriptable (line 82) | def is_scriptable():
  class set_scriptable (line 86) | class set_scriptable:
    method __init__ (line 87) | def __init__(self, mode: bool) -> None:
    method __enter__ (line 92) | def __enter__(self) -> None:
    method __exit__ (line 95) | def __exit__(self, *args: Any) -> bool:
  class set_layer_config (line 101) | class set_layer_config:
    method __init__ (line 105) | def __init__(
    method __enter__ (line 125) | def __enter__(self) -> None:
    method __exit__ (line 128) | def __exit__(self, *args: Any) -> bool:
  function use_fused_attn (line 137) | def use_fused_attn(experimental: bool = False) -> bool:
  function set_fused_attn (line 146) | def set_fused_attn(enable: bool = True, experimental: bool = False):
  function use_reentrant_ckpt (line 159) | def use_reentrant_ckpt() -> bool:
  function set_reentrant_ckpt (line 163) | def set_reentrant_ckpt(enable: bool = True):

FILE: timm/layers/conv2d_same.py
  function conv2d_same (line 18) | def conv2d_same(
  class Conv2dSame (line 32) | class Conv2dSame(nn.Conv2d):
    method __init__ (line 36) | def __init__(
    method forward (line 62) | def forward(self, x):
  class Conv2dSameExport (line 74) | class Conv2dSameExport(nn.Conv2d):
    method __init__ (line 81) | def __init__(
    method forward (line 109) | def forward(self, x):
  function create_conv2d_pad (line 128) | def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs):

FILE: timm/layers/conv_bn_act.py
  class ConvNormAct (line 15) | class ConvNormAct(nn.Module):
    method __init__ (line 16) | def __init__(
    method in_channels (line 85) | def in_channels(self):
    method out_channels (line 89) | def out_channels(self):
    method forward (line 92) | def forward(self, x):

FILE: timm/layers/coord_attn.py
  class CoordAttn (line 23) | class CoordAttn(nn.Module):
    method __init__ (line 24) | def __init__(
    method forward (line 74) | def forward(self, x):
  class SimpleCoordAttn (line 101) | class SimpleCoordAttn(nn.Module):
    method __init__ (line 112) | def __init__(
    method forward (line 154) | def forward(self, x):
  class EfficientLocalAttn (line 176) | class EfficientLocalAttn(nn.Module):
    method __init__ (line 186) | def __init__(
    method forward (line 241) | def forward(self, x):
  class StripAttn (line 263) | class StripAttn(nn.Module):
    method __init__ (line 269) | def __init__(
    method forward (line 322) | def forward(self, x):

FILE: timm/layers/create_act.py
  function get_act_fn (line 92) | def get_act_fn(name: Optional[LayerType] = 'relu'):
  function get_act_layer (line 110) | def get_act_layer(name: Optional[LayerType] = 'relu'):
  function create_act_layer (line 129) | def create_act_layer(

FILE: timm/layers/create_attn.py
  function get_attn (line 22) | def get_attn(attn_type):
  function create_attn (line 93) | def create_attn(attn_type, channels, **kwargs):

FILE: timm/layers/create_conv2d.py
  function create_conv2d (line 11) | def create_conv2d(in_channels, out_channels, kernel_size, **kwargs):

FILE: timm/layers/create_norm.py
  function create_norm_layer (line 54) | def create_norm_layer(layer_name, num_features, **kwargs):
  function get_norm_layer (line 60) | def get_norm_layer(norm_layer):

FILE: timm/layers/create_norm_act.py
  function create_norm_act_layer (line 92) | def create_norm_act_layer(
  function get_norm_act_layer (line 107) | def get_norm_act_layer(

FILE: timm/layers/diff_attention.py
  class DiffAttention (line 21) | class DiffAttention(nn.Module):
    method __init__ (line 37) | def __init__(
    method set_lambda_init (line 107) | def set_lambda_init(self, depth: int):
    method reset_parameters (line 110) | def reset_parameters(self):
    method _compute_lambda (line 120) | def _compute_lambda(self) -> torch.Tensor:
    method forward (line 129) | def forward(

FILE: timm/layers/drop.py
  function drop_block_2d (line 24) | def drop_block_2d(
  class DropBlock2d (line 102) | class DropBlock2d(nn.Module):
    method __init__ (line 116) | def __init__(
    method forward (line 143) | def forward(self, x):
  function drop_path (line 158) | def drop_path(x, drop_prob: float = 0., training: bool = False, scale_by...
  class DropPath (line 178) | class DropPath(nn.Module):
    method __init__ (line 181) | def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True):
    method forward (line 186) | def forward(self, x):
    method extra_repr (line 189) | def extra_repr(self):
  function calculate_drop_path_rates (line 193) | def calculate_drop_path_rates(

FILE: timm/layers/eca.py
  class EcaModule (line 46) | class EcaModule(nn.Module):
    method __init__ (line 61) | def __init__(
    method forward (line 98) | def forward(self, x):
  class CecaModule (line 111) | class CecaModule(nn.Module):
    method __init__ (line 135) | def __init__(
    method forward (line 161) | def forward(self, x):

FILE: timm/layers/evo_norm.py
  function instance_std (line 36) | def instance_std(x, eps: float = 1e-5):
  function instance_std_tpu (line 41) | def instance_std_tpu(x, eps: float = 1e-5):
  function instance_rms (line 47) | def instance_rms(x, eps: float = 1e-5):
  function manual_var (line 52) | def manual_var(x, dim: Union[int, Sequence[int]], diff_sqm: bool = False):
  function group_std (line 62) | def group_std(x, groups: int = 32, eps: float = 1e-5, flatten: bool = Fa...
  function group_std_tpu (line 75) | def group_std_tpu(x, groups: int = 32, eps: float = 1e-5, diff_sqm: bool...
  function group_rms (line 90) | def group_rms(x, groups: int = 32, eps: float = 1e-5):
  class EvoNorm2dB0 (line 99) | class EvoNorm2dB0(nn.Module):
    method __init__ (line 100) | def __init__(
    method reset_parameters (line 122) | def reset_parameters(self):
    method forward (line 128) | def forward(self, x):
  class EvoNorm2dB1 (line 149) | class EvoNorm2dB1(nn.Module):
    method __init__ (line 150) | def __init__(
    method reset_parameters (line 171) | def reset_parameters(self):
    method forward (line 175) | def forward(self, x):
  class EvoNorm2dB2 (line 195) | class EvoNorm2dB2(nn.Module):
    method __init__ (line 196) | def __init__(
    method reset_parameters (line 217) | def reset_parameters(self):
    method forward (line 221) | def forward(self, x):
  class EvoNorm2dS0 (line 241) | class EvoNorm2dS0(nn.Module):
    method __init__ (line 242) | def __init__(
    method reset_parameters (line 268) | def reset_parameters(self):
    method forward (line 274) | def forward(self, x):
  class EvoNorm2dS0a (line 284) | class EvoNorm2dS0a(EvoNorm2dS0):
    method __init__ (line 285) | def __init__(
    method forward (line 306) | def forward(self, x):
  class EvoNorm2dS1 (line 318) | class EvoNorm2dS1(nn.Module):
    method __init__ (line 319) | def __init__(
    method reset_parameters (line 351) | def reset_parameters(self):
    method forward (line 355) | def forward(self, x):
  class EvoNorm2dS1a (line 364) | class EvoNorm2dS1a(EvoNorm2dS1):
    method __init__ (line 365) | def __init__(
    method forward (line 388) | def forward(self, x):
  class EvoNorm2dS2 (line 396) | class EvoNorm2dS2(nn.Module):
    method __init__ (line 397) | def __init__(
    method reset_parameters (line 428) | def reset_parameters(self):
    method forward (line 432) | def forward(self, x):
  class EvoNorm2dS2a (line 441) | class EvoNorm2dS2a(EvoNorm2dS2):
    method __init__ (line 442) | def __init__(
    method forward (line 465) | def forward(self, x):

FILE: timm/layers/fast_norm.py
  function get_autocast_dtype (line 33) | def get_autocast_dtype(device: str = 'cuda'):
  function is_autocast_enabled (line 45) | def is_autocast_enabled(device: str = 'cuda'):
  function is_fast_norm (line 57) | def is_fast_norm():
  function set_fast_norm (line 61) | def set_fast_norm(enable=True):
  function fast_group_norm (line 66) | def fast_group_norm(
  function fast_layer_norm (line 91) | def fast_layer_norm(
  function rms_norm (line 119) | def rms_norm(
  function fast_rms_norm (line 142) | def fast_rms_norm(
  function rms_norm2d (line 173) | def rms_norm2d(
  function fast_rms_norm2d (line 188) | def fast_rms_norm2d(
  function simple_norm (line 218) | def simple_norm(
  function fast_simple_norm (line 240) | def fast_simple_norm(

FILE: timm/layers/filter_response_norm.py
  function inv_instance_rms (line 16) | def inv_instance_rms(x, eps: float = 1e-5):
  class FilterResponseNormTlu2d (line 21) | class FilterResponseNormTlu2d(nn.Module):
    method __init__ (line 22) | def __init__(
    method reset_parameters (line 43) | def reset_parameters(self):
    method forward (line 49) | def forward(self, x):
  class FilterResponseNormAct2d (line 58) | class FilterResponseNormAct2d(nn.Module):
    method __init__ (line 59) | def __init__(
    method reset_parameters (line 84) | def reset_parameters(self):
    method forward (line 88) | def forward(self, x):

FILE: timm/layers/format.py
  class Format (line 7) | class Format(str, Enum):
  function get_spatial_dim (line 17) | def get_spatial_dim(fmt: FormatT):
  function get_channel_dim (line 38) | def get_channel_dim(fmt: FormatT):
  function nchw_to (line 57) | def nchw_to(x: torch.Tensor, fmt: Format):
  function nhwc_to (line 76) | def nhwc_to(x: torch.Tensor, fmt: Format):

FILE: timm/layers/gather_excite.py
  class GatherExcite (line 26) | class GatherExcite(nn.Module):
    method __init__ (line 29) | def __init__(
    method forward (line 85) | def forward(self, x):

FILE: timm/layers/global_context.py
  class GlobalContext (line 21) | class GlobalContext(nn.Module):
    method __init__ (line 23) | def __init__(
    method reset_parameters (line 60) | def reset_parameters(self):
    method forward (line 66) | def forward(self, x):

FILE: timm/layers/grid.py
  function ndgrid (line 6) | def ndgrid(*tensors) -> Tuple[torch.Tensor, ...]:
  function meshgrid (line 30) | def meshgrid(*tensors) -> Tuple[torch.Tensor, ...]:

FILE: timm/layers/grn.py
  class GlobalResponseNorm (line 18) | class GlobalResponseNorm(nn.Module):
    method __init__ (line 21) | def __init__(
    method forward (line 44) | def forward(self, x):

FILE: timm/layers/halo_attn.py
  function rel_logits_1d (line 30) | def rel_logits_1d(q, rel_k, permute_mask: List[int]):
  class PosEmbedRel (line 61) | class PosEmbedRel(nn.Module):
    method __init__ (line 67) | def __init__(
    method reset_parameters (line 94) | def reset_parameters(self):
    method forward (line 98) | def forward(self, q):
  class HaloAttn (line 114) | class HaloAttn(nn.Module):
    method __init__ (line 142) | def __init__(
    method reset_parameters (line 199) | def reset_parameters(self):
    method forward (line 206) | def forward(self, x):

FILE: timm/layers/helpers.py
  function _ntuple (line 10) | def _ntuple(n):
  function make_divisible (line 36) | def make_divisible(v, divisor=8, min_value=None, round_limit=.9):
  function extend_tuple (line 59) | def extend_tuple(x, n):

FILE: timm/layers/hybrid_embed.py
  class HybridEmbed (line 21) | class HybridEmbed(nn.Module):
    method __init__ (line 28) | def __init__(
    method _init_backbone (line 93) | def _init_backbone(
    method set_input_size (line 131) | def set_input_size(
    method feat_ratio (line 181) | def feat_ratio(self, as_scalar=True) -> Union[Tuple[int, int], int]:
    method dynamic_feat_size (line 191) | def dynamic_feat_size(self, img_size: Tuple[int, int]) -> Tuple[int, i...
    method set_grad_checkpointing (line 201) | def set_grad_checkpointing(self, enable: bool = True):
    method forward (line 207) | def forward(self, x):
  class HybridEmbedWithSize (line 224) | class HybridEmbedWithSize(HybridEmbed):
    method __init__ (line 228) | def __init__(
    method set_grad_checkpointing (line 257) | def set_grad_checkpointing(self, enable: bool = True):
    method forward (line 263) | def forward(self, x) -> Tuple[torch.Tensor, List[int]]:

FILE: timm/layers/inplace_abn.py
  function inplace_abn (line 10) | def inplace_abn(x, weight, bias, running_mean, running_var,
  function inplace_abn_sync (line 15) | def inplace_abn_sync(**kwargs):
  class InplaceAbn (line 22) | class InplaceAbn(nn.Module):
    method __init__ (line 43) | def __init__(
    method reset_parameters (line 86) | def reset_parameters(self):
    method forward (line 93) | def forward(self, x):

FILE: timm/layers/interpolate.py
  class RegularGridInterpolator (line 10) | class RegularGridInterpolator:
    method __init__ (line 18) | def __init__(self, points, values):
    method __call__ (line 34) | def __call__(self, points_to_interp):

FILE: timm/layers/lambda_layer.py
  function rel_pos_indices (line 34) | def rel_pos_indices(size, device=None):
  class LambdaLayer (line 46) | class LambdaLayer(nn.Module):
    method __init__ (line 73) | def __init__(
    method reset_parameters (line 131) | def reset_parameters(self) -> None:
    method _init_buffers (line 140) | def _init_buffers(self) -> None:
    method forward (line 147) | def forward(self, x):
    method init_non_persistent_buffers (line 173) | def init_non_persistent_buffers(self) -> None:

FILE: timm/layers/layer_scale.py
  class LayerScale (line 5) | class LayerScale(nn.Module):
    method __init__ (line 8) | def __init__(
    method reset_parameters (line 23) | def reset_parameters(self):
    method forward (line 26) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LayerScale2d (line 30) | class LayerScale2d(nn.Module):
    method __init__ (line 33) | def __init__(
    method reset_parameters (line 48) | def reset_parameters(self):
    method forward (line 51) | def forward(self, x):

FILE: timm/layers/linear.py
  class Linear (line 8) | class Linear(nn.Linear):
    method forward (line 14) | def forward(self, input: torch.Tensor) -> torch.Tensor:

FILE: timm/layers/median_pool.py
  class MedianPool2d (line 9) | class MedianPool2d(nn.Module):
    method __init__ (line 18) | def __init__(self, kernel_size=3, stride=1, padding=0, same=False):
    method _padding (line 25) | def _padding(self, x):
    method forward (line 45) | def forward(self, x):

FILE: timm/layers/mixed_conv2d.py
  function _split_channels (line 15) | def _split_channels(num_chan, num_groups):
  class MixedConv2d (line 21) | class MixedConv2d(nn.ModuleDict):
    method __init__ (line 27) | def __init__(
    method forward (line 64) | def forward(self, x):

FILE: timm/layers/ml_decoder.py
  function add_ml_decoder_head (line 9) | def add_ml_decoder_head(model):
  class TransformerDecoderLayerOptimal (line 35) | class TransformerDecoderLayerOptimal(nn.Module):
    method __init__ (line 36) | def __init__(self, d_model, nhead=8, dim_feedforward=2048, dropout=0.1...
    method __setstate__ (line 56) | def __setstate__(self, state):
    method forward (line 61) | def forward(self, tgt: Tensor, memory: Tensor, tgt_mask: Optional[Tens...
  class MLDecoder (line 90) | class MLDecoder(nn.Module):
    method __init__ (line 91) | def __init__(self, num_classes, num_of_groups=-1, decoder_embedding=76...
    method forward (line 123) | def forward(self, x):

FILE: timm/layers/mlp.py
  class Mlp (line 14) | class Mlp(nn.Module):
    method __init__ (line 19) | def __init__(
    method forward (line 47) | def forward(self, x):
  class GluMlp (line 57) | class GluMlp(nn.Module):
    method __init__ (line 63) | def __init__(
    method init_weights (line 95) | def init_weights(self):
    method forward (line 101) | def forward(self, x):
  class SwiGLU (line 115) | class SwiGLU(nn.Module):
    method __init__ (line 120) | def __init__(
    method init_weights (line 151) | def init_weights(self):
    method forward (line 157) | def forward(self, x):
  class GatedMlp (line 168) | class GatedMlp(nn.Module):
    method __init__ (line 171) | def __init__(
    method forward (line 204) | def forward(self, x):
  class ConvMlp (line 215) | class ConvMlp(nn.Module):
    method __init__ (line 218) | def __init__(
    method forward (line 242) | def forward(self, x):
  class GlobalResponseNormMlp (line 251) | class GlobalResponseNormMlp(nn.Module):
    method __init__ (line 256) | def __init__(
    method forward (line 283) | def forward(self, x):

FILE: timm/layers/non_local_attn.py
  class NonLocalAttn (line 19) | class NonLocalAttn(nn.Module):
    method __init__ (line 26) | def __init__(
    method forward (line 49) | def forward(self, x):
    method reset_parameters (line 71) | def reset_parameters(self):
  class BilinearAttnTransform (line 87) | class BilinearAttnTransform(nn.Module):
    method __init__ (line 89) | def __init__(
    method resize_mat (line 109) | def resize_mat(self, x, t: int):
    method forward (line 122) | def forward(self, x):
  class BatNonLocalAttn (line 148) | class BatNonLocalAttn(nn.Module):
    method __init__ (line 153) | def __init__(
    method forward (line 184) | def forward(self, x):

FILE: timm/layers/norm.py
  class GroupNorm (line 31) | class GroupNorm(nn.GroupNorm):
    method __init__ (line 34) | def __init__(
    method forward (line 46) | def forward(self, x):
  class GroupNorm1 (line 53) | class GroupNorm1(nn.GroupNorm):
    method __init__ (line 59) | def __init__(self, num_channels: int, **kwargs):
    method forward (line 63) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LayerNorm (line 70) | class LayerNorm(nn.LayerNorm):
    method __init__ (line 75) | def __init__(
    method forward (line 85) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LayerNormFp32 (line 93) | class LayerNormFp32(nn.LayerNorm):
    method __init__ (line 97) | def __init__(
    method forward (line 106) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LayerNorm2d (line 113) | class LayerNorm2d(nn.LayerNorm):
    method __init__ (line 117) | def __init__(
    method forward (line 127) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LayerNorm2dFp32 (line 137) | class LayerNorm2dFp32(nn.LayerNorm):
    method __init__ (line 140) | def __init__(
    method forward (line 149) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function _is_contiguous (line 158) | def _is_contiguous(tensor: torch.Tensor) -> bool:
  function _layer_norm_cf (line 166) | def _layer_norm_cf(x: torch.Tensor, weight: torch.Tensor, bias: torch.Te...
  function _layer_norm_cf_sqm (line 173) | def _layer_norm_cf_sqm(x: torch.Tensor, weight: torch.Tensor, bias: torc...
  class LayerNormExp2d (line 181) | class LayerNormExp2d(nn.LayerNorm):
    method __init__ (line 190) | def __init__(self, num_channels: int, eps: float = 1e-6):
    method forward (line 193) | def forward(self, x) -> torch.Tensor:
  class RmsNorm (line 202) | class RmsNorm(nn.Module):
    method __init__ (line 211) | def __init__(
    method reset_parameters (line 237) | def reset_parameters(self) -> None:
    method forward (line 241) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class RmsNormFp32 (line 251) | class RmsNormFp32(nn.Module):
    method __init__ (line 259) | def __init__(
    method reset_parameters (line 284) | def reset_parameters(self) -> None:
    method forward (line 288) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class RmsNorm2d (line 294) | class RmsNorm2d(nn.Module):
    method __init__ (line 307) | def __init__(
    method reset_parameters (line 333) | def reset_parameters(self) -> None:
    method forward (line 337) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class RmsNorm2dFp32 (line 347) | class RmsNorm2dFp32(nn.Module):
    method __init__ (line 359) | def __init__(
    method reset_parameters (line 384) | def reset_parameters(self) -> None:
    method forward (line 388) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class SimpleNorm (line 394) | class SimpleNorm(nn.Module):
    method __init__ (line 403) | def __init__(
    method reset_parameters (line 429) | def reset_parameters(self) -> None:
    method forward (line 433) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class SimpleNormFp32 (line 441) | class SimpleNormFp32(nn.Module):
    method __init__ (line 449) | def __init__(
    method reset_parameters (line 474) | def reset_parameters(self) -> None:
    method forward (line 478) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class SimpleNorm2d (line 484) | class SimpleNorm2d(nn.Module):
    method __init__ (line 493) | def __init__(
    method reset_parameters (line 519) | def reset_parameters(self) -> None:
    method forward (line 523) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class SimpleNorm2dFp32 (line 533) | class SimpleNorm2dFp32(nn.Module):
    method __init__ (line 541) | def __init__(
    method reset_parameters (line 566) | def reset_parameters(self) -> None:
    method forward (line 570) | def forward(self, x: torch.Tensor) -> torch.Tensor:

FILE: timm/layers/norm_act.py
  function _create_act (line 42) | def _create_act(
  class BatchNormAct2d (line 57) | class BatchNormAct2d(nn.BatchNorm2d):
    method __init__ (line 64) | def __init__(
    method forward (line 101) | def forward(self, x):
  class SyncBatchNormAct (line 153) | class SyncBatchNormAct(nn.SyncBatchNorm):
    method forward (line 158) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function convert_sync_batchnorm (line 167) | def convert_sync_batchnorm(module, process_group=None):
  class FrozenBatchNormAct2d (line 211) | class FrozenBatchNormAct2d(torch.nn.Module):
    method __init__ (line 220) | def __init__(
    method _load_from_state_dict (line 243) | def _load_from_state_dict(
    method forward (line 261) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method __repr__ (line 274) | def __repr__(self) -> str:
  function freeze_batch_norm_2d (line 278) | def freeze_batch_norm_2d(module):
  function unfreeze_batch_norm_2d (line 322) | def unfreeze_batch_norm_2d(module):
  function _num_groups (line 363) | def _num_groups(num_channels: int, num_groups: int, group_size: int):
  class GroupNormAct (line 370) | class GroupNormAct(nn.GroupNorm):
    method __init__ (line 374) | def __init__(
    method forward (line 402) | def forward(self, x):
  class GroupNorm1Act (line 412) | class GroupNorm1Act(nn.GroupNorm):
    method __init__ (line 415) | def __init__(
    method forward (line 434) | def forward(self, x):
  class LayerNormAct (line 444) | class LayerNormAct(nn.LayerNorm):
    method __init__ (line 447) | def __init__(
    method forward (line 465) | def forward(self, x):
  class LayerNormActFp32 (line 475) | class LayerNormActFp32(nn.LayerNorm):
    method __init__ (line 477) | def __init__(
    method forward (line 493) | def forward(self, x):
  class LayerNormAct2d (line 502) | class LayerNormAct2d(nn.LayerNorm):
    method __init__ (line 505) | def __init__(
    method forward (line 522) | def forward(self, x):
  class LayerNormAct2dFp32 (line 534) | class LayerNormAct2dFp32(nn.LayerNorm):
    method __init__ (line 536) | def __init__(
    method forward (line 552) | def forward(self, x):
  class RmsNormAct (line 563) | class RmsNormAct(RmsNorm):
    method __init__ (line 570) | def __init__(
    method forward (line 587) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class RmsNormActFp32 (line 597) | class RmsNormActFp32(RmsNorm):
    method __init__ (line 604) | def __init__(
    method forward (line 620) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class RmsNormAct2d (line 628) | class RmsNormAct2d(RmsNorm2d):
    method __init__ (line 635) | def __init__(
    method forward (line 652) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class RmsNormAct2dFp32 (line 662) | class RmsNormAct2dFp32(RmsNorm2d):
    method __init__ (line 669) | def __init__(
    method forward (line 685) | def forward(self, x: torch.Tensor) -> torch.Tensor:

FILE: timm/layers/other_pool.py
  class LsePlus2d (line 24) | class LsePlus2d(nn.Module):
    method __init__ (line 36) | def __init__(
    method forward (line 57) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class LsePlus1d (line 67) | class LsePlus1d(nn.Module):
    method __init__ (line 75) | def __init__(
    method forward (line 93) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class SimPool2d (line 102) | class SimPool2d(nn.Module):
    method __init__ (line 113) | def __init__(
    method forward (line 154) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class SimPool1d (line 198) | class SimPool1d(nn.Module):
    method __init__ (line 209) | def __init__(
    method forward (line 249) | def forward(self, x: torch.Tensor) -> torch.Tensor:

FILE: timm/layers/padding.py
  function get_padding (line 15) | def get_padding(kernel_size: int, stride: int = 1, dilation: int = 1, **...
  function get_same_padding (line 24) | def get_same_padding(x: int, kernel_size: int, stride: int, dilation: int):
  function is_static_pad (line 32) | def is_static_pad(kernel_size: int, stride: int = 1, dilation: int = 1, ...
  function pad_same_arg (line 39) | def pad_same_arg(
  function pad_same (line 53) | def pad_same(
  function get_padding_value (line 67) | def get_padding_value(padding, kernel_size, **kwargs) -> Tuple[Tuple, bo...

FILE: timm/layers/patch_dropout.py
  function patch_dropout_forward (line 7) | def patch_dropout_forward(
  class PatchDropout (line 53) | class PatchDropout(nn.Module):
    method __init__ (line 59) | def __init__(
    method forward (line 71) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class PatchDropoutWithIndices (line 82) | class PatchDropoutWithIndices(nn.Module):
    method __init__ (line 88) | def __init__(
    method forward (line 100) | def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, Optional[tor...

FILE: timm/layers/patch_embed.py
  class PatchEmbed (line 26) | class PatchEmbed(nn.Module):
    method __init__ (line 32) | def __init__(
    method _init_img_size (line 65) | def _init_img_size(self, img_size: Union[int, Tuple[int, int]]):
    method set_input_size (line 74) | def set_input_size(
    method feat_ratio (line 102) | def feat_ratio(self, as_scalar=True) -> Union[Tuple[int, int], int]:
    method dynamic_feat_size (line 108) | def dynamic_feat_size(self, img_size: Tuple[int, int]) -> Tuple[int, i...
    method forward (line 117) | def forward(self, x):
  class PatchEmbedWithSize (line 145) | class PatchEmbedWithSize(PatchEmbed):
    method __init__ (line 150) | def __init__(
    method forward (line 176) | def forward(self, x) -> Tuple[torch.Tensor, List[int]]:
  function resample_patch_embed_old (line 193) | def resample_patch_embed_old(
  function _compute_resize_matrix (line 266) | def _compute_resize_matrix(
  function _apply_resampling (line 293) | def _apply_resampling(
  function resample_patch_embed (line 311) | def resample_patch_embed(
  class PatchEmbedResamplerFixedOrigSize (line 341) | class PatchEmbedResamplerFixedOrigSize(nn.Module):
    method __init__ (line 346) | def __init__(
    method _get_or_create_pinv_matrix (line 367) | def _get_or_create_pinv_matrix(
    method forward (line 397) | def forward(self, patch_embed: torch.Tensor, new_size: List[int]) -> t...
  class PatchEmbedInterpolator (line 434) | class PatchEmbedInterpolator(nn.Module):
    method __init__ (line 449) | def __init__(
    method resample_linear_weight (line 464) | def resample_linear_weight(
    method resample_conv_weight (line 506) | def resample_conv_weight(
    method forward (line 534) | def forward(

FILE: timm/layers/pool1d.py
  function global_pool_nlc (line 4) | def global_pool_nlc(

FILE: timm/layers/pool2d_same.py
  function avg_pool2d_same (line 15) | def avg_pool2d_same(
  class AvgPool2dSame (line 29) | class AvgPool2dSame(nn.AvgPool2d):
    method __init__ (line 32) | def __init__(
    method forward (line 44) | def forward(self, x):
  function max_pool2d_same (line 50) | def max_pool2d_same(
  class MaxPool2dSame (line 63) | class MaxPool2dSame(nn.MaxPool2d):
    method __init__ (line 66) | def __init__(
    method forward (line 79) | def forward(self, x):
  function create_pool2d (line 84) | def create_pool2d(pool_type, kernel_size, stride=None, **kwargs):

FILE: timm/layers/pos_embed.py
  function resample_abs_pos_embed (line 19) | def resample_abs_pos_embed(
  function resample_abs_pos_embed_nhwc (line 64) | def resample_abs_pos_embed_nhwc(

FILE: timm/layers/pos_embed_rel.py
  function gen_relative_position_index (line 21) | def gen_relative_position_index(
  function resize_rel_pos_bias_table_simple (line 77) | def resize_rel_pos_bias_table_simple(
  function resize_rel_pos_bias_table_levit (line 124) | def resize_rel_pos_bias_table_levit(
  function resize_rel_pos_bias_table (line 156) | def resize_rel_pos_bias_table(
  class RelPosBias (line 272) | class RelPosBias(nn.Module):
    method __init__ (line 277) | def __init__(
    method reset_parameters (line 305) | def reset_parameters(self) -> None:
    method _init_buffers (line 310) | def _init_buffers(self) -> None:
    method get_bias (line 320) | def get_bias(self) -> torch.Tensor:
    method forward (line 326) | def forward(self, attn, shared_rel_pos: Optional[torch.Tensor] = None):
    method init_non_persistent_buffers (line 329) | def init_non_persistent_buffers(self) -> None:
  function gen_relative_log_coords (line 334) | def gen_relative_log_coords(
  class RelPosMlp (line 365) | class RelPosMlp(nn.Module):
    method __init__ (line 371) | def __init__(
    method get_bias (line 426) | def get_bias(self) -> torch.Tensor:
    method forward (line 439) | def forward(self, attn, shared_rel_pos: Optional[torch.Tensor] = None):
    method reset_parameters (line 442) | def reset_parameters(self) -> None:
    method _init_buffers (line 446) | def _init_buffers(self) -> None:
    method init_non_persistent_buffers (line 463) | def init_non_persistent_buffers(self) -> None:
  function generate_lookup_tensor (line 468) | def generate_lookup_tensor(
  function reindex_2d_einsum_lookup (line 499) | def reindex_2d_einsum_lookup(
  class RelPosBiasTf (line 528) | class RelPosBiasTf(nn.Module):
    method __init__ (line 533) | def __init__(
    method reset_parameters (line 560) | def reset_parameters(self) -> None:
    method _init_buffers (line 565) | def _init_buffers(self) -> None:
    method get_bias (line 572) | def get_bias(self) -> torch.Tensor:
    method forward (line 582) | def forward(self, attn, shared_rel_pos: Optional[torch.Tensor] = None):
    method init_non_persistent_buffers (line 585) | def init_non_persistent_buffers(self) -> None:

FILE: timm/layers/pos_embed_sincos.py
  function pixel_freq_bands (line 15) | def pixel_freq_bands(
  function freq_bands (line 28) | def freq_bands(
  function build_sincos2d_pos_embed (line 39) | def build_sincos2d_pos_embed(
  function swap_shape_xy (line 80) | def swap_shape_xy(seq: List[int]) -> List[int]:
  function build_fourier_pos_embed (line 86) | def build_fourier_pos_embed(
  class FourierEmbed (line 171) | class FourierEmbed(nn.Module):
    method __init__ (line 173) | def __init__(
    method reset_parameters (line 192) | def reset_parameters(self) -> None:
    method _init_buffers (line 196) | def _init_buffers(self) -> None:
    method init_non_persistent_buffers (line 200) | def init_non_persistent_buffers(self) -> None:
    method forward (line 204) | def forward(self, x):
  function rot (line 228) | def rot(x):
  function rope_rotate_half (line 234) | def rope_rotate_half(x: torch.Tensor) -> torch.Tensor:
  function apply_rot_embed (line 241) | def apply_rot_embed(
  function apply_rot_embed_list (line 260) | def apply_rot_embed_list(
  function apply_rot_embed_cat (line 281) | def apply_rot_embed_cat(
  function apply_keep_indices_nlc (line 300) | def apply_keep_indices_nlc(
  function build_rotary_pos_embed (line 337) | def build_rotary_pos_embed(
  class RotaryEmbedding (line 393) | class RotaryEmbedding(nn.Module):
    method __init__ (line 404) | def __init__(
    method reset_parameters (line 451) | def reset_parameters(self) -> None:
    method _init_buffers (line 455) | def _init_buffers(self) -> None:
    method _compute_bands (line 464) | def _compute_bands(self, device=None, dtype=None):
    method _get_pos_embed_values (line 480) | def _get_pos_embed_values(self, feat_shape: List[int], device=None, dt...
    method init_non_persistent_buffers (line 496) | def init_non_persistent_buffers(self) -> None:
    method update_feat_shape (line 500) | def update_feat_shape(self, feat_shape: List[int]):
    method get_embed (line 512) | def get_embed(self, shape: Optional[List[int]] = None):
    method forward (line 528) | def forward(self, x):
  class RotaryEmbeddingCat (line 534) | class RotaryEmbeddingCat(nn.Module):
    method __init__ (line 542) | def __init__(
    method reset_parameters (line 587) | def reset_parameters(self) -> None:
    method _init_buffers (line 591) | def _init_buffers(self) -> None:
    method _compute_bands (line 598) | def _compute_bands(self, device=None, dtype=None):
    method _get_pos_embed_values (line 614) | def _get_pos_embed_values(self, feat_shape: List[int], device=None, dt...
    method init_non_persistent_buffers (line 630) | def init_non_persistent_buffers(self) -> None:
    method update_feat_shape (line 634) | def update_feat_shape(self, feat_shape: List[int]):
    method get_embed (line 645) | def get_embed(self, shape: Optional[List[int]] = None):
    method get_batch_embeds (line 662) | def get_batch_embeds(
    method forward (line 715) | def forward(self, x):
  function init_random_2d_freqs (line 721) | def init_random_2d_freqs(
  function get_mixed_grid (line 757) | def get_mixed_grid(
  function get_mixed_freqs (line 775) | def get_mixed_freqs(
  class RotaryEmbeddingMixed (line 793) | class RotaryEmbeddingMixed(nn.Module):
    method __init__ (line 802) | def __init__(
    method _init_buffers (line 857) | def _init_buffers(self) -> None:
    method reset_parameters (line 864) | def reset_parameters(self) -> None:
    method _get_grid_values (line 868) | def _get_grid_values(self, feat_shape: Optional[List[int]]):
    method update_feat_shape (line 876) | def update_feat_shape(self, feat_shape: Optional[List[int]]):
    method init_non_persistent_buffers (line 885) | def init_non_persistent_buffers(self) -> None:
    method get_embed (line 889) | def get_embed(self, shape: Optional[List[int]] = None) -> torch.Tensor:
    method get_batch_embeds (line 911) | def get_batch_embeds(
    method forward (line 967) | def forward(self, x):
    method no_weight_decay (line 972) | def no_weight_decay(self):
  function make_coords_dinov3 (line 979) | def make_coords_dinov3(
  class RotaryEmbeddingDinoV3 (line 1027) | class RotaryEmbeddingDinoV3(nn.Module):
    method __init__ (line 1036) | def __init__(
    method reset_parameters (line 1091) | def reset_parameters(self) -> None:
    method _init_buffers (line 1095) | def _init_buffers(self) -> None:
    method _compute_periods (line 1102) | def _compute_periods(self, device: torch.device = 'cpu', dtype: torch....
    method _apply_coord_augs (line 1119) | def _apply_coord_augs(self, coords: torch.Tensor) -> torch.Tensor:
    method _get_pos_embed_from_coords (line 1153) | def _get_pos_embed_from_coords(self, coords: torch.Tensor) -> Tuple[to...
    method _create_embed (line 1177) | def _create_embed(
    method _cache_embed (line 1195) | def _cache_embed(self, feat_shape: List[int]):
    method update_feat_shape (line 1201) | def update_feat_shape(self, feat_shape: List[int]):
    method init_non_persistent_buffers (line 1206) | def init_non_persistent_buffers(self) -> None:
    method get_embed (line 1210) | def get_embed(self, shape: Optional[List[int]] = None) -> torch.Tensor:
    method forward (line 1228) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function create_rope_embed (line 1235) | def create_rope_embed(

FILE: timm/layers/selective_kernel.py
  function _kernel_valid (line 17) | def _kernel_valid(k):
  class SelectiveKernelAttn (line 24) | class SelectiveKernelAttn(nn.Module):
    method __init__ (line 25) | def __init__(
    method forward (line 48) | def forward(self, x):
  class SelectiveKernel (line 61) | class SelectiveKernel(nn.Module):
    method __init__ (line 63) | def __init__(
    method forward (line 139) | def forward(self, x):

FILE: timm/layers/separable_conv.py
  class SeparableConvNormAct (line 16) | class SeparableConvNormAct(nn.Module):
    method __init__ (line 19) | def __init__(
    method in_channels (line 65) | def in_channels(self):
    method out_channels (line 69) | def out_channels(self):
    method forward (line 72) | def forward(self, x):
  class SeparableConv2d (line 82) | class SeparableConv2d(nn.Module):
    method __init__ (line 85) | def __init__(
    method in_channels (line 123) | def in_channels(self):
    method out_channels (line 127) | def out_channels(self):
    method forward (line 130) | def forward(self, x):

FILE: timm/layers/space_to_depth.py
  class SpaceToDepth (line 5) | class SpaceToDepth(nn.Module):
    method __init__ (line 16) | def __init__(self, block_size: int = 4):
    method forward (line 21) | def forward(self, x):
  class DepthToSpace (line 29) | class DepthToSpace(nn.Module):
    method __init__ (line 39) | def __init__(self, block_size):
    method forward (line 43) | def forward(self, x):

FILE: timm/layers/split_attn.py
  class RadixSoftmax (line 18) | class RadixSoftmax(nn.Module):
    method __init__ (line 19) | def __init__(self, radix: int, cardinality: int):
    method forward (line 24) | def forward(self, x):
  class SplitAttn (line 35) | class SplitAttn(nn.Module):
    method __init__ (line 38) | def __init__(
    method forward (line 89) | def forward(self, x):

FILE: timm/layers/split_batchnorm.py
  class SplitBatchNorm2d (line 18) | class SplitBatchNorm2d(torch.nn.BatchNorm2d):
    method __init__ (line 20) | def __init__(
    method forward (line 40) | def forward(self, input: torch.Tensor):
  function convert_splitbn_model (line 53) | def convert_splitbn_model(module, num_splits=2):

FILE: timm/layers/squeeze_excite.py
  class SEModule (line 21) | class SEModule(nn.Module):
    method __init__ (line 30) | def __init__(
    method forward (line 55) | def forward(self, x):
  class EffectiveSEModule (line 69) | class EffectiveSEModule(nn.Module):
    method __init__ (line 73) | def __init__(
    method forward (line 88) | def forward(self, x):
  class SqueezeExciteCl (line 100) | class SqueezeExciteCl(nn.Module):
    method __init__ (line 109) | def __init__(
    method forward (line 130) | def forward(self, x):

FILE: timm/layers/std_conv.py
  class StdConv2d (line 29) | class StdConv2d(nn.Conv2d):
    method __init__ (line 35) | def __init__(
    method forward (line 56) | def forward(self, x):
  class StdConv2dSame (line 70) | class StdConv2dSame(nn.Conv2d):
    method __init__ (line 76) | def __init__(
    method forward (line 97) | def forward(self, x):
  class ScaledStdConv2d (line 112) | class ScaledStdConv2d(nn.Conv2d):
    method __init__ (line 121) | def __init__(
    method reset_parameters (line 151) | def reset_parameters(self) -> None:
    method forward (line 159) | def forward(self, x):
  class ScaledStdConv2dSame (line 173) | class ScaledStdConv2dSame(nn.Conv2d):
    method __init__ (line 182) | def __init__(
    method reset_parameters (line 212) | def reset_parameters(self) -> None:
    method forward (line 220) | def forward(self, x):

FILE: timm/layers/test_time_pool.py
  class TestTimePoolHead (line 16) | class TestTimePoolHead(nn.Module):
    method __init__ (line 17) | def __init__(self, base, original_pool=7):
    method forward (line 31) | def forward(self, x):
  function apply_test_time_pool (line 39) | def apply_test_time_pool(model, config, use_test_size=False):

FILE: timm/layers/trace_utils.py
  function _assert (line 4) | def _assert(condition: bool, message: str):
  function _float_to_int (line 8) | def _float_to_int(x: float) -> int:

FILE: timm/layers/typing.py
  function nullwrap (line 17) | def nullwrap(fn: F) -> F: ...  # decorator form
  function nullwrap (line 20) | def nullwrap(fn: None = ...) -> ContextManager: ...  # context‑manager form
  function nullwrap (line 22) | def nullwrap(fn: Optional[F] = None):

FILE: timm/layers/weight_init.py
  function is_meta_device (line 8) | def is_meta_device(device) -> bool:
  function _trunc_normal_ (line 19) | def _trunc_normal_(tensor, mean, std, a, b):
  function trunc_normal_ (line 54) | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
  function trunc_normal_tf_ (line 81) | def trunc_normal_tf_(tensor, mean=0., std=1., a=-2., b=2.):
  function variance_scaling_ (line 110) | def variance_scaling_(tensor, scale=1.0, mode='fan_in', distribution='no...
  function lecun_normal_ (line 135) | def lecun_normal_(tensor):
  function init_weight_vit (line 139) | def init_weight_vit(
  function init_weight_jax (line 158) | def init_weight_jax(

FILE: timm/loss/asymmetric_loss.py
  class AsymmetricLossMultiLabel (line 5) | class AsymmetricLossMultiLabel(nn.Module):
    method __init__ (line 6) | def __init__(self, gamma_neg=4, gamma_pos=1, clip=0.05, eps=1e-8, disa...
    method forward (line 15) | def forward(self, x, y):
  class AsymmetricLossSingleLabel (line 53) | class AsymmetricLossSingleLabel(nn.Module):
    method __init__ (line 54) | def __init__(self, gamma_pos=1, gamma_neg=4, eps: float = 0.1, reducti...
    method forward (line 64) | def forward(self, inputs, target, reduction=None):

FILE: timm/loss/binary_cross_entropy.py
  class BinaryCrossEntropy (line 12) | class BinaryCrossEntropy(nn.Module):
    method __init__ (line 16) | def __init__(
    method forward (line 37) | def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:

FILE: timm/loss/cross_entropy.py
  class LabelSmoothingCrossEntropy (line 11) | class LabelSmoothingCrossEntropy(nn.Module):
    method __init__ (line 14) | def __init__(self, smoothing=0.1):
    method forward (line 20) | def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
  class SoftTargetCrossEntropy (line 29) | class SoftTargetCrossEntropy(nn.Module):
    method __init__ (line 31) | def __init__(self):
    method forward (line 34) | def forward(self, x: torch.Tensor, target: torch.Tensor) -> torch.Tensor:

FILE: timm/loss/jsd.py
  class JsdCrossEntropy (line 8) | class JsdCrossEntropy(nn.Module):
    method __init__ (line 17) | def __init__(self, num_splits=3, alpha=12, smoothing=0.1):
    method __call__ (line 26) | def __call__(self, output, target):

FILE: timm/models/_builder.py
  function _resolve_pretrained_source (line 43) | def _resolve_pretrained_source(pretrained_cfg: Dict[str, Any]) -> Tuple[...
  function set_pretrained_download_progress (line 91) | def set_pretrained_download_progress(enable: bool = True) -> None:
  function set_pretrained_check_hash (line 97) | def set_pretrained_check_hash(enable: bool = True) -> None:
  function load_custom_pretrained (line 103) | def load_custom_pretrained(
  function load_pretrained (line 152) | def load_pretrained(
  function pretrained_cfg_for_features (line 291) | def pretrained_cfg_for_features(pretrained_cfg: Dict[str, Any]) -> Dict[...
  function _filter_kwargs (line 300) | def _filter_kwargs(kwargs: Dict[str, Any], names: List[str]) -> None:
  function _update_default_model_kwargs (line 307) | def _update_default_model_kwargs(pretrained_cfg, kwargs, kwargs_filter) ...
  function resolve_pretrained_cfg (line 348) | def resolve_pretrained_cfg(
  function build_model_with_cfg (line 384) | def build_model_with_cfg(

FILE: timm/models/_efficientnet_blocks.py
  function num_groups (line 34) | def num_groups(group_size: Optional[int], channels: int):
  class SqueezeExcite (line 43) | class SqueezeExcite(nn.Module):
    method __init__ (line 55) | def __init__(
    method forward (line 78) | def forward(self, x):
  class ConvBnAct (line 86) | class ConvBnAct(nn.Module):
    method __init__ (line 89) | def __init__(
    method feature_info (line 127) | def feature_info(self, location):
    method forward (line 133) | def forward(self, x):
  class DepthwiseSeparableConv (line 143) | class DepthwiseSeparableConv(nn.Module):
    method __init__ (line 148) | def __init__(
    method feature_info (line 212) | def feature_info(self, location):
    method forward (line 218) | def forward(self, x):
  class InvertedResidual (line 234) | class InvertedResidual(nn.Module):
    method __init__ (line 244) | def __init__(
    method feature_info (line 318) | def feature_info(self, location):
    method forward (line 324) | def forward(self, x):
  class UniversalInvertedResidual (line 342) | class UniversalInvertedResidual(nn.Module):
    method __init__ (line 349) | def __init__(
    method feature_info (line 469) | def feature_info(self, location):
    method forward (line 475) | def forward(self, x):
  class MobileAttention (line 489) | class MobileAttention(nn.Module):
    method __init__ (line 495) | def __init__(
    method feature_info (line 591) | def feature_info(self, location):
    method forward (line 597) | def forward(self, x):
  class CondConvResidual (line 612) | class CondConvResidual(InvertedResidual):
    method __init__ (line 615) | def __init__(
    method forward (line 662) | def forward(self, x):
  class EdgeResidual (line 678) | class EdgeResidual(nn.Module):
    method __init__ (line 690) | def __init__(
    method feature_info (line 745) | def feature_info(self, location):
    method forward (line 751) | def forward(self, x):

FILE: timm/models/_efficientnet_builder.py
  function get_bn_args_tf (line 43) | def get_bn_args_tf():
  function resolve_bn_args (line 47) | def resolve_bn_args(kwargs):
  function resolve_act_layer (line 58) | def resolve_act_layer(kwargs, default='relu'):
  function round_channels (line 62) | def round_channels(channels, multiplier=1.0, divisor=8, channel_min=None...
  function _log_info_if (line 69) | def _log_info_if(msg, condition):
  function _parse_ksize (line 74) | def _parse_ksize(ss):
  function _decode_block_str (line 81) | def _decode_block_str(block_str):
  function _scale_stage_depth (line 232) | def _scale_stage_depth(stack_args, repeats, depth_multiplier=1.0, depth_...
  function decode_arch_def (line 270) | def decode_arch_def(
  class EfficientNetBuilder (line 316) | class EfficientNetBuilder:
    method __init__ (line 325) | def __init__(
    method _make_block (line 369) | def _make_block(self, ba, block_idx, block_count):
    method __call__ (line 433) | def __call__(self, in_chs, model_block_args):
  function _init_weight_goog (line 532) | def _init_weight_goog(m, n='', fix_group_fanout=True):
  function efficientnet_init_weights (line 573) | def efficientnet_init_weights(model: nn.Module, init_fn=None):

FILE: timm/models/_factory.py
  function parse_model_name (line 18) | def parse_model_name(model_name: str) -> Tuple[Optional[str], str]:
  function safe_model_name (line 35) | def safe_model_name(model_name: str, remove_source: bool = True) -> str:
  function create_model (line 44) | def create_model(

FILE: timm/models/_features.py
  function feature_take_indices (line 28) | def feature_take_indices(
  function _out_indices_as_tuple (line 69) | def _out_indices_as_tuple(x: Union[int, Tuple[int, ...]]) -> Tuple[int, ...
  class FeatureInfo (line 79) | class FeatureInfo:
    method __init__ (line 81) | def __init__(
    method from_other (line 98) | def from_other(self, out_indices: OutIndicesT):
    method get (line 102) | def get(self, key: str, idx: Optional[Union[int, List[int]]] = None):
    method get_dicts (line 115) | def get_dicts(self, keys: Optional[List[str]] = None, idx: Optional[Un...
    method channels (line 128) | def channels(self, idx: Optional[Union[int, List[int]]] = None):
    method reduction (line 133) | def reduction(self, idx: Optional[Union[int, List[int]]] = None):
    method module_name (line 138) | def module_name(self, idx: Optional[Union[int, List[int]]] = None):
    method __getitem__ (line 143) | def __getitem__(self, item):
    method __len__ (line 146) | def __len__(self):
  class FeatureHooks (line 150) | class FeatureHooks:
    method __init__ (line 159) | def __init__(
    method _collect_output_hook (line 186) | def _collect_output_hook(self, hook_id, *args):
    method get_output (line 192) | def get_output(self, device) -> Dict[str, torch.tensor]:
  function _module_list (line 198) | def _module_list(module, flatten_sequential=False):
  function _get_feature_info (line 212) | def _get_feature_info(net, out_indices: OutIndicesT):
  function _get_return_layers (line 222) | def _get_return_layers(feature_info, out_map):
  class FeatureDictNet (line 230) | class FeatureDictNet(nn.ModuleDict):
    method __init__ (line 245) | def __init__(
    method set_grad_checkpointing (line 286) | def set_grad_checkpointing(self, enable: bool = True):
    method _collect (line 289) | def _collect(self, x) -> (Dict[str, torch.Tensor]):
    method forward (line 311) | def forward(self, x) -> Dict[str, torch.Tensor]:
  class FeatureListNet (line 315) | class FeatureListNet(FeatureDictNet):
    method __init__ (line 320) | def __init__(
    method forward (line 344) | def forward(self, x) -> (List[torch.Tensor]):
  class FeatureHookNet (line 348) | class FeatureHookNet(nn.ModuleDict):
    method __init__ (line 361) | def __init__(
    method set_grad_checkpointing (line 418) | def set_grad_checkpointing(self, enable: bool = True):
    method forward (line 421) | def forward(self, x):
  class FeatureGetterNet (line 435) | class FeatureGetterNet(nn.ModuleDict):
    method __init__ (line 441) | def __init__(
    method forward (line 475) | def forward(self, x):

FILE: timm/models/_features_fx.py
  class FeatureGraphNet (line 38) | class FeatureGraphNet(nn.Module):
    method __init__ (line 43) | def __init__(
    method forward (line 60) | def forward(self, x):
  class GraphExtractNet (line 67) | class GraphExtractNet(nn.Module):
    method __init__ (line 83) | def __init__(
    method forward (line 95) | def forward(self, x) -> Union[List[torch.Tensor], torch.Tensor]:

FILE: timm/models/_helpers.py
  function _checkpoint_unsafe_globals (line 31) | def _checkpoint_unsafe_globals(checkpoint_path: str) -> str:
  function _torch_load (line 41) | def _torch_load(
  function _remove_prefix (line 72) | def _remove_prefix(text: str, prefix: str) -> str:
  function clean_state_dict (line 79) | def clean_state_dict(state_dict: Dict[str, Any]) -> Dict[str, Any]:
  function load_state_dict (line 93) | def load_state_dict(
  function load_checkpoint (line 136) | def load_checkpoint(
  function remap_state_dict (line 178) | def remap_state_dict(
  function resume_checkpoint (line 207) | def resume_checkpoint(

FILE: timm/models/_hub.py
  function get_cache_dir (line 54) | def get_cache_dir(child_dir: str = ''):
  function download_cached_file (line 69) | def download_cached_file(
  function check_cached_file (line 95) | def check_cached_file(
  function has_hf_hub (line 121) | def has_hf_hub(necessary: bool = False):
  function hf_split (line 129) | def hf_split(hf_id: str):
  function load_cfg_from_json (line 138) | def load_cfg_from_json(json_file: Union[str, Path]):
  function download_from_hf (line 144) | def download_from_hf(
  function _parse_model_cfg (line 158) | def _parse_model_cfg(
  function load_model_config_from_hf (line 190) | def load_model_config_from_hf(
  function load_model_config_from_path (line 201) | def load_model_config_from_path(
  function load_state_dict_from_hf (line 214) | def load_state_dict_from_hf(
  function load_state_dict_from_path (line 265) | def load_state_dict_from_path(
  function load_custom_from_hf (line 296) | def load_custom_from_hf(
  function save_config_for_hf (line 313) | def save_config_for_hf(
  function save_for_hf (line 362) | def save_for_hf(
  function push_to_hf_hub (line 390) | def push_to_hf_hub(
  function generate_readme (line 451) | def generate_readme(
  function _get_safe_alternatives (line 520) | def _get_safe_alternatives(filename: str) -> Iterable[str]:
  function _get_license_from_hf_hub (line 535) | def _get_license_from_hf_hub(model_id: Optional[str], hf_hub_id: Optiona...

FILE: timm/models/_manipulate.py
  function model_parameters (line 21) | def model_parameters(model: nn.Module, exclude_head: bool = False):
  function named_apply (line 29) | def named_apply(
  function named_modules (line 45) | def named_modules(
  function named_modules_with_params (line 61) | def named_modules_with_params(
  function group_with_matcher (line 80) | def group_with_matcher(
  function group_parameters (line 141) | def group_parameters(
  function group_modules (line 151) | def group_modules(
  function flatten_modules (line 161) | def flatten_modules(
  function checkpoint (line 191) | def checkpoint(
  function checkpoint_seq (line 213) | def checkpoint_seq(
  function adapt_input_conv (line 289) | def adapt_input_conv(in_chans: int, conv_weight: Tensor) -> Tensor:
  function reinit_non_persistent_buffers (line 314) | def reinit_non_persistent_buffers(model: nn.Module) -> List[str]:

FILE: timm/models/_pretrained.py
  class PretrainedCfg (line 11) | class PretrainedCfg:
    method has_weights (line 58) | def has_weights(self):
    method to_dict (line 61) | def to_dict(self, remove_source=False, remove_null=True):
  function filter_pretrained_cfg (line 69) | def filter_pretrained_cfg(cfg, remove_source=False, remove_null=True):
  class DefaultCfg (line 82) | class DefaultCfg:
    method default (line 88) | def default(self):
    method default_with_tag (line 92) | def default_with_tag(self):

FILE: timm/models/_prune.py
  function extract_layer (line 12) | def extract_layer(model, layer):
  function set_layer (line 39) | def set_layer(model, layer, val):
  function adapt_model_from_string (line 70) | def adapt_model_from_string(parent_module, model_string):
  function adapt_model_from_file (line 165) | def adapt_model_from_file(parent_module, model_variant):

FILE: timm/models/_registry.py
  function split_model_name_tag (line 33) | def split_model_name_tag(model_name: str, no_tag: str = '') -> Tuple[str...
  function get_arch_name (line 39) | def get_arch_name(model_name: str) -> str:
  function generate_default_cfgs (line 43) | def generate_default_cfgs(cfgs: Dict[str, Union[Dict[str, Any], Pretrain...
  function register_model (line 75) | def register_model(fn: Callable[..., Any]) -> Callable[..., Any]:
  function _deprecated_model_shim (line 139) | def _deprecated_model_shim(deprecated_name: str, current_fn: Callable = ...
  function register_model_deprecations (line 149) | def register_model_deprecations(module_name: str, deprecation_map: Dict[...
  function _natural_key (line 171) | def _natural_key(string_: str) -> List[Union[int, str]]:
  function _expand_filter (line 176) | def _expand_filter(filter: str):
  function list_models (line 185) | def list_models(
  function list_pretrained (line 268) | def list_pretrained(
  function get_deprecated_models (line 280) | def get_deprecated_models(module: str = '') -> Dict[str, str]:
  function is_model (line 285) | def is_model(model_name: str) -> bool:
  function model_entrypoint (line 292) | def model_entrypoint(model_name: str, module_filter: Optional[str] = Non...
  function list_modules (line 301) | def list_modules() -> List[str]:
  function is_model_in_modules (line 308) | def is_model_in_modules(
  function is_model_pretrained (line 322) | def is_model_pretrained(model_name: str) -> bool:
  function get_pretrained_cfg (line 326) | def get_pretrained_cfg(model_name: str, allow_unregistered: bool = True)...
  function get_pretrained_cfg_value (line 339) | def get_pretrained_cfg_value(model_name: str, cfg_key: str) -> Optional[...
  function get_arch_pretrained_cfgs (line 346) | def get_arch_pretrained_cfgs(model_name: str) -> Dict[str, PretrainedCfg]:

FILE: timm/models/beit.py
  function gen_relative_position_index (line 73) | def gen_relative_position_index(window_size: Tuple[int, int], device=Non...
  class Attention (line 108) | class Attention(nn.Module):
    method __init__ (line 116) | def __init__(
    method _get_rel_pos_bias (line 185) | def _get_rel_pos_bias(self) -> torch.Tensor:
    method forward (line 198) | def forward(self, x: torch.Tensor, shared_rel_pos_bias: Optional[torch...
    method reset_parameters (line 254) | def reset_parameters(self) -> None:
    method _init_buffers (line 263) | def _init_buffers(self) -> None:
    method init_non_persistent_buffers (line 272) | def init_non_persistent_buffers(self) -> None:
  class Block (line 277) | class Block(nn.Module):
    method __init__ (line 285) | def __init__(
    method reset_parameters (line 368) | def reset_parameters(self) -> None:
    method forward (line 374) | def forward(self, x: torch.Tensor, shared_rel_pos_bias: Optional[torch...
  class RelativePositionBias (line 393) | class RelativePositionBias(nn.Module):
    method __init__ (line 400) | def __init__(self, window_size: Tuple[int, int], num_heads: int, devic...
    method reset_parameters (line 422) | def reset_parameters(self) -> None:
    method _init_buffers (line 427) | def _init_buffers(self) -> None:
    method init_non_persistent_buffers (line 433) | def init_non_persistent_buffers(self) -> None:
    method forward (line 437) | def forward(self) -> torch.Tensor:
  class Beit (line 448) | class Beit(nn.Module):
    method __init__ (line 456) | def __init__(
    method init_weights (line 575) | def init_weights(self, needs_reset: bool = True) -> None:
    method fix_init_weight (line 595) | def fix_init_weight(self) -> None:
    method _init_weights (line 607) | def _init_weights(self, m: nn.Module, needs_reset: bool = True):
    method no_weight_decay (line 622) | def no_weight_decay(self) -> Set[str]:
    method set_grad_checkpointing (line 635) | def set_grad_checkpointing(self, enable: bool = True):
    method group_matcher (line 644) | def group_matcher(self, coarse: bool = False) -> Dict[str, Any]:
    method get_classifier (line 660) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 668) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method forward_intermediates (line 680) | def forward_intermediates(
    method prune_intermediate_layers (line 752) | def prune_intermediate_layers(
    method forward_features (line 777) | def forward_features(self, x: torch.Tensor) -> torch.Tensor:
    method forward_head (line 801) | def forward_head(self, x: torch.Tensor, pre_logits: bool = False) -> t...
    method forward (line 817) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function _cfg (line 831) | def _cfg(url: str = '', **kwargs) -> Dict[str, Any]:
  function checkpoint_filter_fn (line 918) | def checkpoint_filter_fn(state_dict: Dict[str, torch.Tensor], model: nn....
  function _create_beit (line 974) | def _create_beit(variant: str, pretrained: bool = False, **kwargs) -> Beit:
  function beit_base_patch16_224 (line 996) | def beit_base_patch16_224(pretrained: bool = False, **kwargs) -> Beit:
  function beit_base_patch16_384 (line 1006) | def beit_base_patch16_384(pretrained: bool = False, **kwargs) -> Beit:
  function beit_large_patch16_224 (line 1016) | def beit_large_patch16_224(pretrained: bool = False, **kwargs) -> Beit:
  function beit_large_patch16_384 (line 1026) | def beit_large_patch16_384(pretrained: bool = False, **kwargs) -> Beit:
  function beit_large_patch16_512 (line 1036) | def beit_large_patch16_512(pretrained: bool = False, **kwargs) -> Beit:
  function beitv2_base_patch16_224 (line 1046) | def beitv2_base_patch16_224(pretrained: bool = False, **kwargs) -> Beit:
  function beitv2_large_patch16_224 (line 1056) | def beitv2_large_patch16_224(pretrained: bool = False, **kwargs) -> Beit:

FILE: timm/models/byoanet.py
  function _create_byoanet (line 265) | def _create_byoanet(variant: str, cfg_variant: Optional[str] = None, pre...
  function _cfg (line 285) | def _cfg(url: str = '', **kwargs) -> Dict[str, Any]:
  function botnet26t_256 (line 371) | def botnet26t_256(pretrained: bool = False, **kwargs) -> ByobNet:
  function sebotnet33ts_256 (line 379) | def sebotnet33ts_256(pretrained: bool = False, **kwargs) -> ByobNet:
  function botnet50ts_256 (line 386) | def botnet50ts_256(pretrained: bool = False, **kwargs) -> ByobNet:
  function eca_botnext26ts_256 (line 394) | def eca_botnext26ts_256(pretrained: bool = False, **kwargs) -> ByobNet:
  function halonet_h1 (line 402) | def halonet_h1(pretrained: bool = False, **kwargs) -> ByobNet:
  function halonet26t (line 410) | def halonet26t(pretrained: bool = False, **kwargs) -> ByobNet:
  function sehalonet33ts (line 417) | def sehalonet33ts(pretrained: bool = False, **kwargs) -> ByobNet:
  function halonet50ts (line 424) | def halonet50ts(pretrained: bool = False, **kwargs) -> ByobNet:
  function eca_halonext26ts (line 431) | def eca_halonext26ts(pretrained: bool = False, **kwargs) -> ByobNet:
  function lambda_resnet26t (line 438) | def lambda_resnet26t(pretrained: bool = False, **kwargs) -> ByobNet:
  function lambda_resnet50ts (line 445) | def lambda_resnet50ts(pretrained: bool = False, **kwargs) -> ByobNet:
  function lambda_resnet26rpt_256 (line 452) | def lambda_resnet26rpt_256(pretrained: bool = False, **kwargs) -> ByobNet:
  function haloregnetz_b (line 460) | def haloregnetz_b(pretrained: bool = False, **kwargs) -> ByobNet:
  function lamhalobotnet50ts_256 (line 467) | def lamhalobotnet50ts_256(pretrained: bool = False, **kwargs) -> ByobNet:
  function halo2botnet50ts_256 (line 474) | def halo2botnet50ts_256(pretrained: bool = False, **kwargs) -> ByobNet:

FILE: timm/models/byobnet.py
  class ByoBlockCfg (line 68) | class ByoBlockCfg:
  class ByoModelCfg (line 89) | class ByoModelCfg:
  function _rep_vgg_bcfg (line 122) | def _rep_vgg_bcfg(d: Tuple[int, ...] = (4, 6, 16, 1), wf: Tuple[float, ....
  function _mobileone_bcfg (line 142) | def _mobileone_bcfg(
  function interleave_blocks (line 179) | def interleave_blocks(
  function expand_blocks_cfg (line 211) | def expand_blocks_cfg(stage_blocks_cfg: Union[ByoBlockCfg, Sequence[ByoB...
  function num_groups (line 228) | def num_groups(group_size: Optional[int], channels: int) -> int:
  class LayerFn (line 247) | class LayerFn:
  class DownsampleAvg (line 256) | class DownsampleAvg(nn.Module):
    method __init__ (line 262) | def __init__(
    method forward (line 294) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function create_shortcut (line 306) | def create_shortcut(
  class BasicBlock (line 341) | class BasicBlock(nn.Module):
    method __init__ (line 345) | def __init__(
    method init_weights (line 396) | def init_weights(self, zero_init_last: bool = False):
    method forward (line 403) | def forward(self, x):
  class BottleneckBlock (line 415) | class BottleneckBlock(nn.Module):
    method __init__ (line 419) | def __init__(
    method init_weights (line 484) | def init_weights(self, zero_init_last: bool = False):
    method forward (line 491) | def forward(self, x):
  class DarkBlock (line 505) | class DarkBlock(nn.Module):
    method __init__ (line 516) | def __init__(
    method init_weights (line 568) | def init_weights(self, zero_init_last: bool = False):
    method forward (line 575) | def forward(self, x):
  class EdgeBlock (line 587) | class EdgeBlock(nn.Module):
    method __init__ (line 597) | def __init__(
    method init_weights (line 647) | def init_weights(self, zero_init_last: bool = False):
    method forward (line 654) | def forward(self, x):
  class RepVggBlock (line 666) | class RepVggBlock(nn.Module):
    method __init__ (line 672) | def __init__(
    method init_weights (line 734) | def init_weights(self, zero_init_last: bool = False):
    method forward (line 743) | def forward(self, x):
    method reparameterize (line 757) | def reparameterize(self):
    method _get_kernel_bias (line 790) | def _get_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]:
    method _fuse_bn_tensor (line 816) | def _fuse_bn_tensor(self, branch) -> Tuple[torch.Tensor, torch.Tensor]:
  class MobileOneBlock (line 848) | class MobileOneBlock(nn.Module):
    method __init__ (line 858) | def __init__(
    method forward (line 933) | def forward(self, x: torch.Tensor) -> torch.Tensor:
    method reparameterize (line 959) | def reparameterize(self):
    method _get_kernel_bias (line 991) | def _get_kernel_bias(self) -> Tuple[torch.Tensor, torch.Tensor]:
    method _fuse_bn_tensor (line 1022) | def _fuse_bn_tensor(self, branch) -> Tuple[torch.Tensor, torch.Tensor]:
  class SelfAttnBlock (line 1054) | class SelfAttnBlock(nn.Module):
    method __init__ (line 1058) | def __init__(
    method init_weights (line 1119) | def init_weights(self, zero_init_last: bool = False):
    method forward (line 1125) | def forward(self, x):
  function register_block (line 1149) | def register_block(block_type: str, block_fn: nn.Module):
  function create_block (line 1153) | def create_block(block: Union[str, nn.Module], **kwargs):
  class Stem (line 1160) | class Stem(nn.Sequential):
    method __init__ (line 1162) | def __init__(
    method forward_intermediates (line 1232) | def forward_intermediates(self, x) -> Tuple[torch.Tensor, Optional[tor...
  function create_byob_stem (line 1241) | def create_byob_stem(
  function reduce_feat_size (line 1291) | def reduce_feat_size(feat_size, stride=2):
  function override_kwargs (line 1295) | def override_kwargs(block_kwargs, model_kwargs):
  function update_block_kwargs (line 1307) | def update_block_kwargs(block_kwargs: Dict[str, Any], block_cfg: ByoBloc...
  function drop_blocks (line 1343) | def drop_blocks(
  function create_byob_stages (line 1372) | def create_byob_stages(
  function get_layer_fns (line 1444) | def get_layer_fns(cfg: ByoModelCfg, allow_aa: bool = True):
  class ByobNet (line 1457) | class ByobNet(nn.Module):
    method __init__ (line 1466) | def __init__(
    method group_matcher (line 1616) | def group_matcher(self, coarse: bool = False) -> Dict[str, Any]:
    method set_grad_checkpointing (line 1635) | def set_grad_checkpointing(self, enable: bool = True) -> None:
    method get_classifier (line 1644) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 1652) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method forward_intermediates (line 1662) | def forward_intermediates(
    method prune_intermediate_layers (line 1724) | def prune_intermediate_layers(
    method forward_features (line 1749) | def forward_features(self, x: torch.Tensor) -> torch.Tensor:
    method forward_head (line 1766) | def forward_head(self, x: torch.Tensor, pre_logits: bool = False) -> t...
    method forward (line 1778) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function _init_weights (line 1792) | def _init_weights(module: nn.Module, name: str = '', zero_init_last: boo...
  function _convert_openai_clip (line 2390) | def _convert_openai_clip(
  function checkpoint_filter_fn (line 2433) | def checkpoint_filter_fn(
  function _create_byobnet (line 2442) | def _create_byobnet(variant: str, pretrained: bool = False, **kwargs) ->...
  function _cfg (line 2462) | def _cfg(url: str = '', **kwargs) -> Dict[str, Any]:
  function _cfgr (line 2482) | def _cfgr(url: str = '', **kwargs) -> Dict[str, Any]:
  function gernet_l (line 2799) | def gernet_l(pretrained=False, **kwargs) -> ByobNet:
  function gernet_m (line 2807) | def gernet_m(pretrained=False, **kwargs) -> ByobNet:
  function gernet_s (line 2815) | def gernet_s(pretrained=False, **kwargs) -> ByobNet:
  function repvgg_a0 (line 2823) | def repvgg_a0(pretrained=False, **kwargs) -> ByobNet:
  function repvgg_a1 (line 2831) | def repvgg_a1(pretrained=False, **kwargs) -> ByobNet:
  function repvgg_a2 (line 2839) | def repvgg_a2(pretrained=False, **kwargs) -> ByobNet:
  function repvgg_b0 (line 2847) | def repvgg_b0(pretrained=False, **kwargs) -> ByobNet:
  function repvgg_b1 (line 2855) | def repvgg_b1(pretrained=False, **kwargs) -> ByobNet:
  function repvgg_b1g4 (line 2863) | def repvgg_b1g4(pretrained=False, **kwargs) -> ByobNet:
  function repvgg_b2 (line 2871) | def repvgg_b2(pretrained=False, **kwargs) -> ByobNet:
  function repvgg_b2g4 (line 2879) | def repvgg_b2g4(pretrained=False, **kwargs) -> ByobNet:
  function repvgg_b3 (line 2887) | def repvgg_b3(pretrained=False, **kwargs) -> ByobNet:
  function repvgg_b3g4 (line 2895) | def repvgg_b3g4(pretrained=False, **kwargs) -> ByobNet:
  function repvgg_d2se (line 2903) | def repvgg_d2se(pretrained=False, **kwargs) -> ByobNet:
  function resnet51q (line 2911) | def resnet51q(pretrained=False, **kwargs) -> ByobNet:
  function resnet61q (line 2918) | def resnet61q(pretrained=False, **kwargs) -> ByobNet:
  function resnext26ts (line 2925) | def resnext26ts(pretrained=False, **kwargs) -> ByobNet:
  function gcresnext26ts (line 2932) | def gcresnext26ts(pretrained=False, **kwargs) -> ByobNet:
  function seresnext26ts (line 2939) | def seresnext26ts(pretrained=False, **kwargs) -> ByobNet:
  function eca_resnext26ts (line 2946) | def eca_resnext26ts(pretrained=False, **kwargs) -> ByobNet:
  function bat_resnext26ts (line 2953) | def bat_resnext26ts(pretrained=False, **kwargs) -> ByobNet:
  function resnet32ts (line 2960) | def resnet32ts(pretrained=False, **kwargs) -> ByobNet:
  function resnet33ts (line 2967) | def resnet33ts(pretrained=False, **kwargs) -> ByobNet:
  function gcresnet33ts (line 2974) | def gcresnet33ts(pretrained=False, **kwargs) -> ByobNet:
  function seresnet33ts (line 2981) | def seresnet33ts(pretrained=False, **kwargs) -> ByobNet:
  function eca_resnet33ts (line 2988) | def eca_resnet33ts(pretrained=False, **kwargs) -> ByobNet:
  function gcresnet50t (line 2995) | def gcresnet50t(pretrained=False, **kwargs) -> ByobNet:
  function gcresnext50ts (line 3002) | def gcresnext50ts(pretrained=False, **kwargs) -> ByobNet:
  function regnetz_b16 (line 3009) | def regnetz_b16(pretrained=False, **kwargs) -> ByobNet:
  function regnetz_c16 (line 3016) | def regnetz_c16(pretrained=False, **kwargs) -> ByobNet:
  function regnetz_d32 (line 3023) | def regnetz_d32(pretrained=False, **kwargs) -> ByobNet:
  function regnetz_d8 (line 3030) | def regnetz_d8(pretrained=False, **kwargs) -> ByobNet:
  function regnetz_e8 (line 3037) | def regnetz_e8(pretrained=False, **kwargs) -> ByobNet:
  function regnetz_b16_evos (line 3044) | def regnetz_b16_evos(pretrained=False, **kwargs) -> ByobNet:
  function regnetz_c16_evos (line 3051) | def regnetz_c16_evos(pretrained=False, **kwargs) -> ByobNet:
  function regnetz_d8_evos (line 3058) | def regnetz_d8_evos(pretrained=False, **kwargs) -> ByobNet:
  function mobileone_s0 (line 3065) | def mobileone_s0(pretrained=False, **kwargs) -> ByobNet:
  function mobileone_s1 (line 3072) | def mobileone_s1(pretrained=False, **kwargs) -> ByobNet:
  function mobileone_s2 (line 3079) | def mobileone_s2(pretrained=False, **kwargs) -> ByobNet:
  function mobileone_s3 (line 3086) | def mobileone_s3(pretrained=False, **kwargs) -> ByobNet:
  function mobileone_s4 (line 3093) | def mobileone_s4(pretrained=False, **kwargs) -> ByobNet:
  function resnet50_clip (line 3100) | def resnet50_clip(pretrained=False, **kwargs) -> ByobNet:
  function resnet101_clip (line 3107) | def resnet101_clip(pretrained=False, **kwargs) -> ByobNet:
  function resnet50x4_clip (line 3114) | def resnet50x4_clip(pretrained=False, **kwargs) -> ByobNet:
  function resnet50x16_clip (line 3121) | def resnet50x16_clip(pretrained=False, **kwargs) -> ByobNet:
  function resnet50x64_clip (line 3128) | def resnet50x64_clip(pretrained=False, **kwargs) -> ByobNet:
  function resnet50_clip_gap (line 3135) | def resnet50_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  function resnet101_clip_gap (line 3142) | def resnet101_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  function resnet50x4_clip_gap (line 3149) | def resnet50x4_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  function resnet50x16_clip_gap (line 3156) | def resnet50x16_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  function resnet50x64_clip_gap (line 3163) | def resnet50x64_clip_gap(pretrained=False, **kwargs) -> ByobNet:
  function resnet50_mlp (line 3170) | def resnet50_mlp(pretrained=False, **kwargs) -> ByobNet:
  function test_byobnet (line 3177) | def test_byobnet(pretrained=False, **kwargs) -> ByobNet:

FILE: timm/models/cait.py
  class ClassAttn (line 27) | class ClassAttn(nn.Module):
    method __init__ (line 32) | def __init__(
    method forward (line 56) | def forward(self, x):
  class LayerScaleBlockClassAttn (line 81) | class LayerScaleBlockClassAttn(nn.Module):
    method __init__ (line 84) | def __init__(
    method forward (line 125) | def forward(self, x, x_cls):
  class TalkingHeadAttn (line 132) | class TalkingHeadAttn(nn.Module):
    method __init__ (line 135) | def __init__(
    method forward (line 164) | def forward(self, x):
  class LayerScaleBlock (line 184) | class LayerScaleBlock(nn.Module):
    method __init__ (line 187) | def __init__(
    method forward (line 228) | def forward(self, x):
  class Cait (line 234) | class Cait(nn.Module):
    method __init__ (line 237) | def __init__(
    method _init_weights (line 333) | def _init_weights(self, m):
    method no_weight_decay (line 343) | def no_weight_decay(self):
    method set_grad_checkpointing (line 347) | def set_grad_checkpointing(self, enable=True):
    method group_matcher (line 351) | def group_matcher(self, coarse=False):
    method get_classifier (line 368) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 371) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method forward_intermediates (line 378) | def forward_intermediates(
    method prune_intermediate_layers (line 439) | def prune_intermediate_layers(
    method forward_features (line 456) | def forward_features(self, x):
    method forward_head (line 471) | def forward_head(self, x, pre_logits: bool = False):
    method forward (line 477) | def forward(self, x):
  function checkpoint_filter_fn (line 483) | def checkpoint_filter_fn(state_dict, model=None):
  function _create_cait (line 492) | def _create_cait(variant, pretrained=False, **kwargs):
  function _cfg (line 505) | def _cfg(url='', **kwargs):
  function cait_xxs24_224 (line 566) | def cait_xxs24_224(pretrained=False, **kwargs) -> Cait:
  function cait_xxs24_384 (line 573) | def cait_xxs24_384(pretrained=False, **kwargs) -> Cait:
  function cait_xxs36_224 (line 580) | def cait_xxs36_224(pretrained=False, **kwargs) -> Cait:
  function cait_xxs36_384 (line 587) | def cait_xxs36_384(pretrained=False, **kwargs) -> Cait:
  function cait_xs24_384 (line 594) | def cait_xs24_384(pretrained=False, **kwargs) -> Cait:
  function cait_s24_224 (line 601) | def cait_s24_224(pretrained=False, **kwargs) -> Cait:
  function cait_s24_384 (line 608) | def cait_s24_384(pretrained=False, **kwargs) -> Cait:
  function cait_s36_384 (line 615) | def cait_s36_384(pretrained=False, **kwargs) -> Cait:
  function cait_m36_384 (line 622) | def cait_m36_384(pretrained=False, **kwargs) -> Cait:
  function cait_m48_448 (line 629) | def cait_m48_448(pretrained=False, **kwargs) -> Cait:

FILE: timm/models/coat.py
  class ConvRelPosEnc (line 24) | class ConvRelPosEnc(nn.Module):
    method __init__ (line 26) | def __init__(
    method forward (line 77) | def forward(self, q, v, size: Tuple[int, int]):
  class FactorAttnConvRelPosEnc (line 99) | class FactorAttnConvRelPosEnc(nn.Module):
    method __init__ (line 101) | def __init__(
    method forward (line 126) | def forward(self, x, size: Tuple[int, int]):
  class ConvPosEnc (line 152) | class ConvPosEnc(nn.Module):
    method __init__ (line 156) | def __init__(
    method forward (line 167) | def forward(self, x, size: Tuple[int, int]):
  class SerialBlock (line 186) | class SerialBlock(nn.Module):
    method __init__ (line 189) | def __init__(
    method forward (line 234) | def forward(self, x, size: Tuple[int, int]):
  class ParallelBlock (line 249) | class ParallelBlock(nn.Module):
    method __init__ (line 251) | def __init__(
    method upsample (line 320) | def upsample(self, x, factor: float, size: Tuple[int, int]):
    method downsample (line 324) | def downsample(self, x, factor: float, size: Tuple[int, int]):
    method interpolate (line 328) | def interpolate(self, x, scale_factor: float, size: Tuple[int, int]):
    method forward (line 351) | def forward(self, x1, x2, x3, x4, sizes: List[Tuple[int, int]]):
  class CoaT (line 386) | class CoaT(nn.Module):
    method __init__ (line 388) | def __init__(
    method _init_weights (line 563) | def _init_weights(self, m):
    method no_weight_decay (line 573) | def no_weight_decay(self):
    method set_grad_checkpointing (line 577) | def set_grad_checkpointing(self, enable=True):
    method group_matcher (line 581) | def group_matcher(self, coarse=False):
    method get_classifier (line 599) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 602) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method forward_features (line 609) | def forward_features(self, x0):
    method forward_head (line 690) | def forward_head(self, x_feat: Union[torch.Tensor, List[torch.Tensor]]...
    method forward (line 703) | def forward(self, x) -> torch.Tensor:
  function insert_cls (line 714) | def insert_cls(x, cls_token):
  function remove_cls (line 721) | def remove_cls(x):
  function checkpoint_filter_fn (line 726) | def checkpoint_filter_fn(state_dict, model):
  function _create_coat (line 742) | def _create_coat(variant, pretrained=False, default_cfg=None, **kwargs):
  function _cfg_coat (line 756) | def _cfg_coat(url='', **kwargs):
  function coat_tiny (line 784) | def coat_tiny(pretrained=False, **kwargs) -> CoaT:
  function coat_mini (line 792) | def coat_mini(pretrained=False, **kwargs) -> CoaT:
  function coat_small (line 800) | def coat_small(pretrained=False, **kwargs) -> CoaT:
  function coat_lite_tiny (line 808) | def coat_lite_tiny(pretrained=False, **kwargs) -> CoaT:
  function coat_lite_mini (line 816) | def coat_lite_mini(pretrained=False, **kwargs) -> CoaT:
  function coat_lite_small (line 824) | def coat_lite_small(pretrained=False, **kwargs) -> CoaT:
  function coat_lite_medium (line 832) | def coat_lite_medium(pretrained=False, **kwargs) -> CoaT:
  function coat_lite_medium_384 (line 840) | def coat_lite_medium_384(pretrained=False, **kwargs) -> CoaT:

FILE: timm/models/convit.py
  class GPSA (line 40) | class GPSA(nn.Module):
    method __init__ (line 41) | def __init__(
    method forward (line 70) | def forward(self, x):
    method get_attention (line 81) | def get_attention(self, x):
    method get_attention_map (line 97) | def get_attention_map(self, x, return_map=False):
    method local_init (line 106) | def local_init(self):
    method get_rel_indices (line 120) | def get_rel_indices(self, num_patches: int) -> torch.Tensor:
  class MHSA (line 138) | class MHSA(nn.Module):
    method __init__ (line 139) | def __init__(
    method get_attention_map (line 160) | def get_attention_map(self, x, return_map=False):
    method forward (line 184) | def forward(self, x):
  class Block (line 199) | class Block(nn.Module):
    method __init__ (line 201) | def __init__(
    method forward (line 251) | def forward(self, x):
  class ConVit (line 257) | class ConVit(nn.Module):
    method __init__ (line 261) | def __init__(
    method _init_weights (line 352) | def _init_weights(self, m):
    method no_weight_decay (line 362) | def no_weight_decay(self):
    method group_matcher (line 366) | def group_matcher(self, coarse=False):
    method set_grad_checkpointing (line 373) | def set_grad_checkpointing(self, enable=True):
    method get_classifier (line 377) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 380) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method forward_features (line 387) | def forward_features(self, x):
    method forward_head (line 400) | def forward_head(self, x, pre_logits: bool = False):
    method forward (line 406) | def forward(self, x):
  function _create_convit (line 412) | def _create_convit(variant, pretrained=False, **kwargs):
  function _cfg (line 419) | def _cfg(url='', **kwargs):
  function convit_tiny (line 438) | def convit_tiny(pretrained=False, **kwargs) -> ConVit:
  function convit_small (line 446) | def convit_small(pretrained=False, **kwargs) -> ConVit:
  function convit_base (line 454) | def convit_base(pretrained=False, **kwargs) -> ConVit:

FILE: timm/models/convmixer.py
  class Residual (line 18) | class Residual(nn.Module):
    method __init__ (line 19) | def __init__(self, fn: nn.Module):
    method forward (line 23) | def forward(self, x):
  class ConvMixer (line 27) | class ConvMixer(nn.Module):
    method __init__ (line 28) | def __init__(
    method group_matcher (line 72) | def group_matcher(self, coarse=False):
    method set_grad_checkpointing (line 77) | def set_grad_checkpointing(self, enable=True):
    method get_classifier (line 81) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 84) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method forward_features (line 90) | def forward_features(self, x):
    method forward_head (line 98) | def forward_head(self, x, pre_logits: bool = False):
    method forward (line 103) | def forward(self, x):
  function _create_convmixer (line 109) | def _create_convmixer(variant, pretrained=False, **kwargs):
  function _cfg (line 116) | def _cfg(url='', **kwargs):
  function convmixer_1536_20 (line 136) | def convmixer_1536_20(pretrained=False, **kwargs) -> ConvMixer:
  function convmixer_768_32 (line 142) | def convmixer_768_32(pretrained=False, **kwargs) -> ConvMixer:
  function convmixer_1024_20_ks9_p14 (line 148) | def convmixer_1024_20_ks9_p14(pretrained=False, **kwargs) -> ConvMixer:

FILE: timm/models/convnext.py
  class Downsample (line 76) | class Downsample(nn.Module):
    method __init__ (line 79) | def __init__(
    method forward (line 110) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class ConvNeXtBlock (line 117) | class ConvNeXtBlock(nn.Module):
    method __init__ (line 129) | def __init__(
    method forward (line 197) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class ConvNeXtStage (line 216) | class ConvNeXtStage(nn.Module):
    method __init__ (line 219) | def __init__(
    method forward (line 300) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function _get_norm_layers (line 320) | def _get_norm_layers(norm_layer: Union[Callable, str], conv_mlp: bool, n...
  class ConvNeXt (line 338) | class ConvNeXt(nn.Module):
    method __init__ (line 344) | def __init__(
    method group_matcher (line 489) | def group_matcher(self, coarse: bool = False) -> Dict[str, Union[str, ...
    method set_grad_checkpointing (line 508) | def set_grad_checkpointing(self, enable: bool = True) -> None:
    method get_classifier (line 518) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 522) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method forward_intermediates (line 532) | def forward_intermediates(
    method prune_intermediate_layers (line 582) | def prune_intermediate_layers(
    method forward_features (line 606) | def forward_features(self, x: torch.Tensor) -> torch.Tensor:
    method forward_head (line 613) | def forward_head(self, x: torch.Tensor, pre_logits: bool = False) -> t...
    method forward (line 625) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function _init_weights (line 632) | def _init_weights(module: nn.Module, name: Optional[str] = None, head_in...
  function checkpoint_filter_fn (line 652) | def checkpoint_filter_fn(state_dict, model):
  function _create_convnext (line 694) | def _create_convnext(variant, pretrained=False, **kwargs):
  function _cfg (line 708) | def _cfg(url='', **kwargs):
  function _cfgv2 (line 719) | def _cfgv2(url='', **kwargs):
  function convnext_zepto_rms (line 1155) | def convnext_zepto_rms(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_zepto_rms_ols (line 1163) | def convnext_zepto_rms_ols(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_atto (line 1172) | def convnext_atto(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_atto_ols (line 1180) | def convnext_atto_ols(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_atto_rms (line 1188) | def convnext_atto_rms(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_femto (line 1196) | def convnext_femto(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_femto_ols (line 1204) | def convnext_femto_ols(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_pico (line 1212) | def convnext_pico(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_pico_ols (line 1220) | def convnext_pico_ols(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_nano (line 1228) | def convnext_nano(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_nano_ols (line 1236) | def convnext_nano_ols(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_tiny_hnf (line 1244) | def convnext_tiny_hnf(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_tiny (line 1252) | def convnext_tiny(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_small (line 1259) | def convnext_small(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_base (line 1266) | def convnext_base(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_large (line 1273) | def convnext_large(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_large_mlp (line 1280) | def convnext_large_mlp(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_xlarge (line 1287) | def convnext_xlarge(pretrained=False, **kwargs) -> ConvNeXt:
  function convnext_xxlarge (line 1294) | def convnext_xxlarge(pretrained=False, **kwargs) -> ConvNeXt:
  function convnextv2_atto (line 1301) | def convnextv2_atto(pretrained=False, **kwargs) -> ConvNeXt:
  function convnextv2_femto (line 1310) | def convnextv2_femto(pretrained=False, **kwargs) -> ConvNeXt:
  function convnextv2_pico (line 1319) | def convnextv2_pico(pretrained=False, **kwargs) -> ConvNeXt:
  function convnextv2_nano (line 1328) | def convnextv2_nano(pretrained=False, **kwargs) -> ConvNeXt:
  function convnextv2_tiny (line 1337) | def convnextv2_tiny(pretrained=False, **kwargs) -> ConvNeXt:
  function convnextv2_small (line 1344) | def convnextv2_small(pretrained=False, **kwargs) -> ConvNeXt:
  function convnextv2_base (line 1351) | def convnextv2_base(pretrained=False, **kwargs) -> ConvNeXt:
  function convnextv2_large (line 1358) | def convnextv2_large(pretrained=False, **kwargs) -> ConvNeXt:
  function convnextv2_huge (line 1365) | def convnextv2_huge(pretrained=False, **kwargs) -> ConvNeXt:
  function test_convnext (line 1372) | def test_convnext(pretrained=False, **kwargs) -> ConvNeXt:
  function test_convnext2 (line 1379) | def test_convnext2(pretrained=False, **kwargs) -> ConvNeXt:
  function test_convnext3 (line 1386) | def test_convnext3(pretrained=False, **kwargs) -> ConvNeXt:

FILE: timm/models/crossvit.py
  class PatchEmbed (line 39) | class PatchEmbed(nn.Module):
    method __init__ (line 43) | def __init__(
    method forward (line 81) | def forward(self, x):
  class CrossAttention (line 92) | class CrossAttention(nn.Module):
    method __init__ (line 93) | def __init__(
    method forward (line 117) | def forward(self, x):
  class CrossAttentionBlock (line 136) | class CrossAttentionBlock(nn.Module):
    method __init__ (line 138) | def __init__(
    method forward (line 166) | def forward(self, x):
  class MultiScaleBlock (line 171) | class MultiScaleBlock(nn.Module):
    method __init__ (line 173) | def __init__(
    method forward (line 265) | def forward(self, x: List[torch.Tensor]) -> List[torch.Tensor]:
  function _compute_num_patches (line 287) | def _compute_num_patches(img_size, patches):
  function scale_image (line 292) | def scale_image(x, ss: Tuple[int, int], crop_scale: bool = False):  # an...
  class CrossVit (line 312) | class CrossVit(nn.Module):
    method __init__ (line 316) | def __init__(
    method _init_weights (line 410) | def _init_weights(self, m):
    method no_weight_decay (line 420) | def no_weight_decay(self):
    method group_matcher (line 430) | def group_matcher(self, coarse=False):
    method set_grad_checkpointing (line 437) | def set_grad_checkpointing(self, enable=True):
    method get_classifier (line 441) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 444) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method forward_features (line 457) | def forward_features(self, x) -> List[torch.Tensor]:
    method forward_head (line 480) | def forward_head(self, xs: List[torch.Tensor], pre_logits: bool = Fals...
    method forward (line 487) | def forward(self, x):
  function _create_crossvit (line 493) | def _create_crossvit(variant, pretrained=False, **kwargs):
  function _cfg (line 516) | def _cfg(url='', **kwargs):
  function crossvit_tiny_240 (line 559) | def crossvit_tiny_240(pretrained=False, **kwargs) -> CrossVit:
  function crossvit_small_240 (line 568) | def crossvit_small_240(pretrained=False, **kwargs) -> CrossVit:
  function crossvit_base_240 (line 577) | def crossvit_base_240(pretrained=False, **kwargs) -> CrossVit:
  function crossvit_9_240 (line 586) | def crossvit_9_240(pretrained=False, **kwargs) -> CrossVit:
  function crossvit_15_240 (line 595) | def crossvit_15_240(pretrained=False, **kwargs) -> CrossVit:
  function crossvit_18_240 (line 604) | def crossvit_18_240(pretrained=False, **kwargs) -> CrossVit:
  function crossvit_9_dagger_240 (line 613) | def crossvit_9_dagger_240(pretrained=False, **kwargs) -> CrossVit:
  function crossvit_15_dagger_240 (line 622) | def crossvit_15_dagger_240(pretrained=False, **kwargs) -> CrossVit:
  function crossvit_15_dagger_408 (line 631) | def crossvit_15_dagger_408(pretrained=False, **kwargs) -> CrossVit:
  function crossvit_18_dagger_240 (line 640) | def crossvit_18_dagger_240(pretrained=False, **kwargs) -> CrossVit:
  function crossvit_18_dagger_408 (line 649) | def crossvit_18_dagger_408(pretrained=False, **kwargs) -> CrossVit:

FILE: timm/models/csatv2.py
  function _zigzag_permutation (line 88) | def _zigzag_permutation(rows: int, cols: int) -> List[int]:
  function _dct_kernel_type_2 (line 105) | def _dct_kernel_type_2(
  function _dct_kernel_type_3 (line 131) | def _dct_kernel_type_3(
  class Dct1d (line 141) | class Dct1d(nn.Module):
    method __init__ (line 144) | def __init__(
    method forward (line 159) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class Dct2d (line 163) | class Dct2d(nn.Module):
    method __init__ (line 166) | def __init__(
    method forward (line 178) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function _split_out_chs (line 182) | def _split_out_chs(out_chs: int, ratio=(24, 4, 4)):
  class LearnableDct2d (line 199) | class LearnableDct2d(nn.Module):
    method __init__ (line 202) | def __init__(
    method reset_parameters (line 232) | def reset_parameters(self) -> None:
    method _init_buffers (line 236) | def _init_buffers(self) -> None:
    method init_non_persistent_buffers (line 243) | def init_non_persistent_buffers(self) -> None:
    method _denormalize (line 247) | def _denormalize(self, x: torch.Tensor) -> torch.Tensor:
    method _rgb_to_ycbcr (line 251) | def _rgb_to_ycbcr(self, x: torch.Tensor) -> torch.Tensor:
    method _frequency_normalize (line 259) | def _frequency_normalize(self, x: torch.Tensor) -> torch.Tensor:
    method forward (line 264) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class Dct2dStats (line 283) | class Dct2dStats(nn.Module):
    method __init__ (line 286) | def __init__(
    method forward (line 300) | def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
  class Block (line 318) | class Block(nn.Module):
    method __init__ (line 321) | def __init__(
    method forward (line 341) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class SpatialTransformerBlock (line 360) | class SpatialTransformerBlock(nn.Module):
    method __init__ (line 367) | def __init__(
    method forward (line 383) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class SpatialAttention (line 411) | class SpatialAttention(nn.Module):
    method __init__ (line 414) | def __init__(
    method forward (line 425) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class TransformerBlock (line 435) | class TransformerBlock(nn.Module):
    method __init__ (line 438) | def __init__(
    method forward (line 485) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class PosConv (line 517) | class PosConv(nn.Module):
    method __init__ (line 520) | def __init__(
    method forward (line 530) | def forward(self, x: torch.Tensor, size: Tuple[int, int]) -> torch.Ten...
  class CSATv2 (line 538) | class CSATv2(nn.Module):
    method __init__ (line 545) | def __init__(
    method init_weights (line 615) | def init_weights(self, needs_reset: bool = True):
    method _init_weights (line 618) | def _init_weights(self, m: nn.Module, needs_reset: bool = True) -> None:
    method get_classifier (line 627) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 630) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method set_grad_checkpointing (line 637) | def set_grad_checkpointing(self, enable: bool = True) -> None:
    method forward_features (line 640) | def forward_features(self, x: torch.Tensor) -> torch.Tensor:
    method forward_intermediates (line 648) | def forward_intermediates(
    method prune_intermediate_layers (line 698) | def prune_intermediate_layers(
    method forward_head (line 726) | def forward_head(self, x: torch.Tensor, pre_logits: bool = False) -> t...
    method forward (line 729) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function _cfg (line 734) | def _cfg(url='', **kwargs):
  function checkpoint_filter_fn (line 762) | def checkpoint_filter_fn(state_dict: dict, model: nn.Module) -> dict:
  function _create_csatv2 (line 844) | def _create_csatv2(variant: str, pretrained: bool = False, **kwargs) -> ...
  function csatv2 (line 858) | def csatv2(pretrained: bool = False, **kwargs) -> CSATv2:
  function csatv2_21m (line 863) | def csatv2_21m(pretrained: bool = False, **kwargs) -> CSATv2:

FILE: timm/models/cspnet.py
  class CspStemCfg (line 32) | class CspStemCfg:
  function _pad_arg (line 40) | def _pad_arg(x, n):
  class CspStagesCfg (line 52) | class CspStagesCfg:
    method __post_init__ (line 70) | def __post_init__(self):
  class CspModelCfg (line 89) | class CspModelCfg:
  function _cs3_cfg (line 98) | def _cs3_cfg(
  class BottleneckBlock (line 135) | class BottleneckBlock(nn.Module):
    method __init__ (line 139) | def __init__(
    method zero_init_last (line 179) | def zero_init_last(self):
    method forward (line 182) | def forward(self, x):
  class DarkBlock (line 196) | class DarkBlock(nn.Module):
    method __init__ (line 200) | def __init__(
    method zero_init_last (line 234) | def zero_init_last(self):
    method forward (line 237) | def forward(self, x):
  class EdgeBlock (line 246) | class EdgeBlock(nn.Module):
    method __init__ (line 250) | def __init__(
    method zero_init_last (line 284) | def zero_init_last(self):
    method forward (line 287) | def forward(self, x):
  class CrossStage (line 296) | class CrossStage(nn.Module):
    method __init__ (line 298) | def __init__(
    method forward (line 382) | def forward(self, x):
  class CrossStage3 (line 392) | class CrossStage3(nn.Module):
    method __init__ (line 396) | def __init__(
    method forward (line 477) | def forward(self, x):
  class DarkStage (line 486) | class DarkStage(nn.Module):
    method __init__ (line 489) | def __init__(
    method forward (line 547) | def forward(self, x):
  function create_csp_stem (line 553) | def create_csp_stem(
  function _get_stage_fn (line 611) | def _get_stage_fn(stage_args):
  function _get_block_fn (line 626) | def _get_block_fn(stage_args):
  function _get_attn_fn (line 637) | def _get_attn_fn(stage_args):
  function create_csp_stages (line 647) | def create_csp_stages(
  class CspNet (line 704) | class CspNet(nn.Module):
    method __init__ (line 714) | def __init__(
    method group_matcher (line 783) | def group_matcher(self, coarse=False):
    method set_grad_checkpointing (line 795) | def set_grad_checkpointing(self, enable=True):
    method get_classifier (line 799) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 802) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method forward_features (line 806) | def forward_features(self, x):
    method forward_head (line 811) | def forward_head(self, x, pre_logits: bool = False):
    method forward (line 814) | def forward(self, x):
  function _init_weights (line 820) | def _init_weights(module, name, zero_init_last=False):
  function _create_cspnet (line 994) | def _create_cspnet(variant, pretrained=False, **kwargs):
  function _cfg (line 1008) | def _cfg(url='', **kwargs):
  function cspresnet50 (line 1097) | def cspresnet50(pretrained=False, **kwargs) -> CspNet:
  function cspresnet50d (line 1102) | def cspresnet50d(pretrained=False, **kwargs) -> CspNet:
  function cspresnet50w (line 1107) | def cspresnet50w(pretrained=False, **kwargs) -> CspNet:
  function cspresnext50 (line 1112) | def cspresnext50(pretrained=False, **kwargs) -> CspNet:
  function cspdarknet53 (line 1117) | def cspdarknet53(pretrained=False, **kwargs) -> CspNet:
  function darknet17 (line 1122) | def darknet17(pretrained=False, **kwargs) -> CspNet:
  function darknet21 (line 1127) | def darknet21(pretrained=False, **kwargs) -> CspNet:
  function sedarknet21 (line 1132) | def sedarknet21(pretrained=False, **kwargs) -> CspNet:
  function darknet53 (line 1137) | def darknet53(pretrained=False, **kwargs) -> CspNet:
  function darknetaa53 (line 1142) | def darknetaa53(pretrained=False, **kwargs) -> CspNet:
  function cs3darknet_s (line 1147) | def cs3darknet_s(pretrained=False, **kwargs) -> CspNet:
  function cs3darknet_m (line 1152) | def cs3darknet_m(pretrained=False, **kwargs) -> CspNet:
  function cs3darknet_l (line 1157) | def cs3darknet_l(pretrained=False, **kwargs) -> CspNet:
  function cs3darknet_x (line 1162) | def cs3darknet_x(pretrained=False, **kwargs) -> CspNet:
  function cs3darknet_focus_s (line 1167) | def cs3darknet_focus_s(pretrained=False, **kwargs) -> CspNet:
  function cs3darknet_focus_m (line 1172) | def cs3darknet_focus_m(pretrained=False, **kwargs) -> CspNet:
  function cs3darknet_focus_l (line 1177) | def cs3darknet_focus_l(pretrained=False, **kwargs) -> CspNet:
  function cs3darknet_focus_x (line 1182) | def cs3darknet_focus_x(pretrained=False, **kwargs) -> CspNet:
  function cs3sedarknet_l (line 1187) | def cs3sedarknet_l(pretrained=False, **kwargs) -> CspNet:
  function cs3sedarknet_x (line 1192) | def cs3sedarknet_x(pretrained=False, **kwargs) -> CspNet:
  function cs3sedarknet_xdw (line 1197) | def cs3sedarknet_xdw(pretrained=False, **kwargs) -> CspNet:
  function cs3edgenet_x (line 1202) | def cs3edgenet_x(pretrained=False, **kwargs) -> CspNet:
  function cs3se_edgenet_x (line 1207) | def cs3se_edgenet_x(pretrained=False, **kwargs) -> CspNet:

FILE: timm/models/davit.py
  class ConvPosEnc (line 34) | class ConvPosEnc(nn.Module):
    method __init__ (line 35) | def __init__(
    method forward (line 57) | def forward(self, x: Tensor):
  class Stem (line 63) | class Stem(nn.Module):
    method __init__ (line 68) | def __init__(
    method forward (line 94) | def forward(self, x: Tensor):
  class Downsample (line 104) | class Downsample(nn.Module):
    method __init__ (line 105) | def __init__(
    method forward (line 130) | def forward(self, x: Tensor):
  class ChannelAttentionV2 (line 142) | class ChannelAttentionV2(nn.Module):
    method __init__ (line 144) | def __init__(
    method forward (line 162) | def forward(self, x):
  class ChannelAttention (line 182) | class ChannelAttention(nn.Module):
    method __init__ (line 184) | def __init__(
    method forward (line 201) | def forward(self, x: Tensor):
  class ChannelBlock (line 216) | class ChannelBlock(nn.Module):
    method __init__ (line 218) | def __init__(
    method forward (line 263) | def forward(self, x: Tensor):
  function window_partition (line 282) | def window_partition(x: Tensor, window_size: Tuple[int, int]):
  function window_reverse (line 297) | def window_reverse(windows: Tensor, window_size: Tuple[int, int], H: int...
  class WindowAttention (line 313) | class WindowAttention(nn.Module):
    method __init__ (line 324) | def __init__(
    method forward (line 347) | def forward(self, x: Tensor):
  class SpatialBlock (line 366) | class SpatialBlock(nn.Module):
    method __init__ (line 379) | def __init__(
    method forward (line 429) | def forward(self, x: Tensor):
  class DaVitStage (line 469) | class DaVitStage(nn.Module):
    method __init__ (line 470) | def __init__(
    method set_grad_checkpointing (line 548) | def set_grad_checkpointing(self, enable=True):
    method forward (line 551) | def forward(self, x: Tensor):
  class DaVit (line 560) | class DaVit(nn.Module):
    method __init__ (line 578) | def __init__(
    method _init_weights (line 674) | def _init_weights(self, m):
    method group_matcher (line 681) | def group_matcher(self, coarse=False):
    method set_grad_checkpointing (line 692) | def set_grad_checkpointing(self, enable=True):
    method get_classifier (line 698) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 701) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method forward_intermediates (line 705) | def forward_intermediates(
    method prune_intermediate_layers (line 758) | def prune_intermediate_layers(
    method forward_features (line 774) | def forward_features(self, x):
    method forward_head (line 783) | def forward_head(self, x, pre_logits: bool = False):
    method forward (line 786) | def forward(self, x):
  function _convert_florence2 (line 792) | def _convert_florence2(state_dict, model, prefix='vision_tower.'):
  function checkpoint_filter_fn (line 820) | def checkpoint_filter_fn(state_dict, model):
  function _create_davit (line 846) | def _create_davit(variant, pretrained=False, **kwargs):
  function _cfg (line 867) | def _cfg(url='', **kwargs):
  function davit_tiny (line 901) | def davit_tiny(pretrained=False, **kwargs) -> DaVit:
  function davit_small (line 907) | def davit_small(pretrained=False, **kwargs) -> DaVit:
  function davit_base (line 913) | def davit_base(pretrained=False, **kwargs) -> DaVit:
  function davit_large (line 919) | def davit_large(pretrained=False, **kwargs) -> DaVit:
  function davit_huge (line 925) | def davit_huge(pretrained=False, **kwargs) -> DaVit:
  function davit_giant (line 931) | def davit_giant(pretrained=False, **kwargs) -> DaVit:
  function davit_base_fl (line 938) | def davit_base_fl(pretrained=False, **kwargs) -> DaVit:
  function davit_huge_fl (line 947) | def davit_huge_fl(pretrained=False, **kwargs) -> DaVit:

FILE: timm/models/deit.py
  class VisionTransformerDistilled (line 28) | class VisionTransformerDistilled(VisionTransformer):
    method __init__ (line 35) | def __init__(self, *args, **kwargs):
    method init_weights (line 53) | def init_weights(self, mode='', needs_reset=True):
    method group_matcher (line 59) | def group_matcher(self, coarse=False):
    method get_classifier (line 68) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 71) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method set_distilled_training (line 77) | def set_distilled_training(self, enable=True):
    method _pos_embed (line 80) | def _pos_embed(self, x):
    method forward_head (line 113) | def forward_head(self, x, pre_logits: bool = False) -> torch.Tensor:
  function _create_deit (line 127) | def _create_deit(variant, pretrained=False, distilled=False, **kwargs):
  function _cfg (line 141) | def _cfg(url='', **kwargs):
  function deit_tiny_patch16_224 (line 251) | def deit_tiny_patch16_224(pretrained=False, **kwargs) -> VisionTransformer:
  function deit_small_patch16_224 (line 261) | def deit_small_patch16_224(pretrained=False, **kwargs) -> VisionTransfor...
  function deit_base_patch16_224 (line 271) | def deit_base_patch16_224(pretrained=False, **kwargs) -> VisionTransformer:
  function deit_base_patch16_384 (line 281) | def deit_base_patch16_384(pretrained=False, **kwargs) -> VisionTransformer:
  function deit_tiny_distilled_patch16_224 (line 291) | def deit_tiny_distilled_patch16_224(pretrained=False, **kwargs) -> Visio...
  function deit_small_distilled_patch16_224 (line 302) | def deit_small_distilled_patch16_224(pretrained=False, **kwargs) -> Visi...
  function deit_base_distilled_patch16_224 (line 313) | def deit_base_distilled_patch16_224(pretrained=False, **kwargs) -> Visio...
  function deit_base_distilled_patch16_384 (line 324) | def deit_base_distilled_patch16_384(pretrained=False, **kwargs) -> Visio...
  function deit3_small_patch16_224 (line 335) | def deit3_small_patch16_224(pretrained=False, **kwargs) -> VisionTransfo...
  function deit3_small_patch16_384 (line 345) | def deit3_small_patch16_384(pretrained=False, **kwargs) -> VisionTransfo...
  function deit3_medium_patch16_224 (line 355) | def deit3_medium_patch16_224(pretrained=False, **kwargs) -> VisionTransf...
  function deit3_base_patch16_224 (line 365) | def deit3_base_patch16_224(pretrained=False, **kwargs) -> VisionTransfor...
  function deit3_base_patch16_384 (line 375) | def deit3_base_patch16_384(pretrained=False, **kwargs) -> VisionTransfor...
  function deit3_large_patch16_224 (line 385) | def deit3_large_patch16_224(pretrained=False, **kwargs) -> VisionTransfo...
  function deit3_large_patch16_384 (line 395) | def deit3_large_patch16_384(pretrained=False, **kwargs) -> VisionTransfo...
  function deit3_huge_patch14_224 (line 405) | def deit3_huge_patch14_224(pretrained=False, **kwargs) -> VisionTransfor...

FILE: timm/models/densenet.py
  class DenseLayer (line 23) | class DenseLayer(nn.Module):
    method __init__ (line 29) | def __init__(
    method bottleneck_fn (line 61) | def bottleneck_fn(self, xs: List[torch.Tensor]) -> torch.Tensor:
    method any_requires_grad (line 68) | def any_requires_grad(self, x: List[torch.Tensor]) -> bool:
    method call_checkpoint_bottleneck (line 75) | def call_checkpoint_bottleneck(self, x: List[torch.Tensor]) -> torch.T...
    method forward (line 84) | def forward(self, x: Union[torch.Tensor, List[torch.Tensor]]) -> torch...
  class DenseBlock (line 111) | class DenseBlock(nn.ModuleDict):
    method __init__ (line 118) | def __init__(
    method forward (line 155) | def forward(self, init_features: torch.Tensor) -> torch.Tensor:
  class DenseTransition (line 171) | class DenseTransition(nn.Sequential):
    method __init__ (line 177) | def __init__(
  class DenseNet (line 205) | class DenseNet(nn.Module):
    method __init__ (line 222) | def __init__(
    method group_matcher (line 357) | def group_matcher(self, coarse: bool = False) -> Dict[str, Any]:
    method set_grad_checkpointing (line 369) | def set_grad_checkpointing(self, enable: bool = True) -> None:
    method get_classifier (line 376) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 380) | def reset_classifier(self, num_classes: int, global_pool: str = 'avg')...
    method forward_features (line 391) | def forward_features(self, x: torch.Tensor) -> torch.Tensor:
    method forward_head (line 395) | def forward_head(self, x: torch.Tensor, pre_logits: bool = False) -> t...
    method forward (line 409) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  function _filter_torchvision_pretrained (line 423) | def _filter_torchvision_pretrained(state_dict: dict) -> Dict[str, torch....
  function _create_densenet (line 444) | def _create_densenet(
  function _cfg (line 475) | def _cfg(url: str = '', **kwargs) -> Dict[str, Any]:
  function densenet121 (line 502) | def densenet121(pretrained=False, **kwargs) -> DenseNet:
  function densenetblur121d (line 512) | def densenetblur121d(pretrained=False, **kwargs) -> DenseNet:
  function densenet169 (line 522) | def densenet169(pretrained=False, **kwargs) -> DenseNet:
  function densenet201 (line 532) | def densenet201(pretrained=False, **kwargs) -> DenseNet:
  function densenet161 (line 542) | def densenet161(pretrained=False, **kwargs) -> DenseNet:
  function densenet264d (line 552) | def densenet264d(pretrained=False, **kwargs) -> DenseNet:

FILE: timm/models/dla.py
  class DlaBasic (line 22) | class DlaBasic(nn.Module):
    method __init__ (line 25) | def __init__(
    method forward (line 62) | def forward(self, x, shortcut: Optional[torch.Tensor] = None, children...
  class DlaBottleneck (line 79) | class DlaBottleneck(nn.Module):
    method __init__ (line 83) | def __init__(
    method forward (line 118) | def forward(self, x, shortcut: Optional[torch.Tensor] = None, children...
  class DlaBottle2neck (line 139) | class DlaBottle2neck(nn.Module):
    method __init__ (line 145) | def __init__(
    method forward (line 192) | def forward(self, x, shortcut: Optional[torch.Tensor] = None, children...
  class DlaRoot (line 228) | class DlaRoot(nn.Module):
    method __init__ (line 229) | def __init__(
    method forward (line 253) | def forward(self, x_children: List[torch.Tensor]):
  class DlaTree (line 263) | class DlaTree(nn.Module):
    method __init__ (line 264) | def __init__(
    method forward (line 325) | def forward(self, x, shortcut: Optional[torch.Tensor] = None, children...
  class DLA (line 342) | class DLA(nn.Module):
    method __init__ (line 343) | def __init__(
    method _make_conv_level (line 408) | def _make_conv_level(self, inplanes: int, planes: int, convs: int, str...
    method group_matcher (line 429) | def group_matcher(self, coarse=False):
    method set_grad_checkpointing (line 442) | def set_grad_checkpointing(self, enable=True):
    method get_classifier (line 446) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 449) | def reset_classifier(self, num_classes: int, global_pool: str = 'avg'):
    method forward_features (line 455) | def forward_features(self, x):
    method forward_head (line 465) | def forward_head(self, x, pre_logits: bool = False):
    method forward (line 473) | def forward(self, x):
  function _create_dla (line 479) | def _create_dla(variant, pretrained=False, **kwargs):
  function _cfg (line 490) | def _cfg(url='', **kwargs):
  function dla60_res2net (line 518) | def dla60_res2net(pretrained=False, **kwargs) -> DLA:
  function dla60_res2next (line 526) | def dla60_res2next(pretrained=False,**kwargs):
  function dla34 (line 534) | def dla34(pretrained=False, **kwargs) -> DLA:  # DLA-34
  function dla46_c (line 541) | def dla46_c(pretrained=False, **kwargs) -> DLA:  # DLA-46-C
  function dla46x_c (line 548) | def dla46x_c(pretrained=False, **kwargs) -> DLA:  # DLA-X-46-C
  function dla60x_c (line 556) | def dla60x_c(pretrained=False, **kwargs) -> DLA:  # DLA-X-60-C
  function dla60 (line 564) | def dla60(pretrained=False, **kwargs) -> DLA:  # DLA-60
  function dla60x (line 572) | def dla60x(pretrained=False, **kwargs) -> DLA:  # DLA-X-60
  function dla102 (line 580) | def dla102(pretrained=False, **kwargs) -> DLA:  # DLA-102
  function dla102x (line 588) | def dla102x(pretrained=False, **kwargs) -> DLA:  # DLA-X-102
  function dla102x2 (line 596) | def dla102x2(pretrained=False, **kwargs) -> DLA:  # DLA-X-102 64
  function dla169 (line 604) | def dla169(pretrained=False, **kwargs) -> DLA:  # DLA-169

FILE: timm/models/dpn.py
  class CatBnAct (line 25) | class CatBnAct(nn.Module):
    method __init__ (line 26) | def __init__(
    method forward (line 37) | def forward(self, x):
  class BnActConv2d (line 43) | class BnActConv2d(nn.Module):
    method __init__ (line 44) | def __init__(
    method forward (line 60) | def forward(self, x):
  class DualPathBlock (line 64) | class DualPathBlock(nn.Module):
    method __init__ (line 65) | def __init__(
    method forward (line 117) | def forward(self, x) -> Tuple[torch.Tensor, torch.Tensor]:
  class DPN (line 151) | class DPN(nn.Module):
    method __init__ (line 152) | def __init__(
    method group_matcher (line 257) | def group_matcher(self, coarse=False):
    method set_grad_checkpointing (line 268) | def set_grad_checkpointing(self, enable=True):
    method get_classifier (line 272) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 275) | def reset_classifier(self, num_classes: int, global_pool: str = 'avg'):
    method forward_features (line 281) | def forward_features(self, x):
    method forward_head (line 284) | def forward_head(self, x, pre_logits: bool = False):
    method forward (line 293) | def forward(self, x):
  function _create_dpn (line 299) | def _create_dpn(variant, pretrained=False, **kwargs):
  function _cfg (line 309) | def _cfg(url='', **kwargs):
  function dpn48b (line 335) | def dpn48b(pretrained=False, **kwargs) -> DPN:
  function dpn68 (line 343) | def dpn68(pretrained=False, **kwargs) -> DPN:
  function dpn68b (line 351) | def dpn68b(pretrained=False, **kwargs) -> DPN:
  function dpn92 (line 359) | def dpn92(pretrained=False, **kwargs) -> DPN:
  function dpn98 (line 367) | def dpn98(pretrained=False, **kwargs) -> DPN:
  function dpn131 (line 375) | def dpn131(pretrained=False, **kwargs) -> DPN:
  function dpn107 (line 383) | def dpn107(pretrained=False, **kwargs) -> DPN:

FILE: timm/models/edgenext.py
  class PositionalEncodingFourier (line 40) | class PositionalEncodingFourier(nn.Module):
    method __init__ (line 41) | def __init__(
    method forward (line 57) | def forward(self, shape: Tuple[int, int, int]):
  class ConvBlock (line 84) | class ConvBlock(nn.Module):
    method __init__ (line 85) | def __init__(
    method forward (line 124) | def forward(self, x):
  class CrossCovarianceAttn (line 141) | class CrossCovarianceAttn(nn.Module):
    method __init__ (line 142) | def __init__(
    method forward (line 162) | def forward(self, x):
    method no_weight_decay (line 179) | def no_weight_decay(self):
  class SplitTransposeBlock (line 183) | class SplitTransposeBlock(nn.Module):
    method __init__ (line 184) | def __init__(
    method forward (line 237) | def forward(self, x):
  class EdgeNeXtStage (line 273) | class EdgeNeXtStage(nn.Module):
    method __init__ (line 274) | def __init__(
    method forward (line 346) | def forward(self, x):
  class EdgeNeXt (line 355) | class EdgeNeXt(nn.Module):
    method __init__ (line 356) | def __init__(
    method group_matcher (line 461) | def group_matcher(self, coarse=False):
    method set_grad_checkpointing (line 472) | def set_grad_checkpointing(self, enable=True):
    method get_classifier (line 477) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 480) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method forward_intermediates (line 484) | def forward_intermediates(
    method prune_intermediate_layers (line 534) | def prune_intermediate_layers(
    method forward_features (line 550) | def forward_features(self, x):
    method forward_head (line 556) | def forward_head(self, x, pre_logits: bool = False):
    method forward (line 559) | def forward(self, x):
  function _init_weights (line 565) | def _init_weights(module, name=None, head_init_scale=1.0):
  function checkpoint_filter_fn (line 578) | def checkpoint_filter_fn(state_dict, model):
  function _create_edgenext (line 609) | def _create_edgenext(variant, pretrained=False, **kwargs):
  function _cfg (line 618) | def _cfg(url='', **kwargs):
  function edgenext_xx_small (line 657) | def edgenext_xx_small(pretrained=False, **kwargs) -> EdgeNeXt:
  function edgenext_x_small (line 668) | def edgenext_x_small(pretrained=False, **kwargs) -> EdgeNeXt:
  function edgenext_small (line 679) | def edgenext_small(pretrained=False, **kwargs) -> EdgeNeXt:
  function edgenext_base (line 690) | def edgenext_base(pretrained=False, **kwargs) -> EdgeNeXt:
  function edgenext_small_rw (line 701) | def edgenext_small_rw(pretrained=False, **kwargs) -> EdgeNeXt:

FILE: timm/models/efficientformer.py
  class Attention (line 52) | class Attention(torch.nn.Module):
    method __init__ (line 55) | def __init__(
    method train (line 90) | def train(self, mode=True):
    method get_attention_biases (line 95) | def get_attention_biases(self, device: torch.device) -> torch.Tensor:
    method forward (line 104) | def forward(self, x):  # x (B,N,C)
  class Stem4 (line 119) | class Stem4(nn.Sequential):
    method __init__ (line 120) | def __init__(
  class Downsample (line 141) | class Downsample(nn.Module):
    method __init__ (line 148) | def __init__(
    method forward (line 166) | def forward(self, x):
  class Flat (line 172) | class Flat(nn.Module):
    method __init__ (line 174) | def __init__(self, ):
    method forward (line 177) | def forward(self, x):
  class Pooling (line 182) | class Pooling(nn.Module):
    method __init__ (line 188) | def __init__(self, pool_size: int = 3):
    method forward (line 192) | def forward(self, x):
  class ConvMlpWithNorm (line 196) | class ConvMlpWithNorm(nn.Module):
    method __init__ (line 202) | def __init__(
    method forward (line 224) | def forward(self, x):
  class MetaBlock1d (line 235) | class MetaBlock1d(nn.Module):
    method __init__ (line 237) | def __init__(
    method forward (line 267) | def forward(self, x):
  class MetaBlock2d (line 273) | class MetaBlock2d(nn.Module):
    method __init__ (line 275) | def __init__(
    method forward (line 305) | def forward(self, x):
  class EfficientFormerStage (line 311) | class EfficientFormerStage(nn.Module):
    method __init__ (line 313) | def __init__(
    method forward (line 378) | def forward(self, x):
  class EfficientFormer (line 387) | class EfficientFormer(nn.Module):
    method __init__ (line 389) | def __init__(
    method _init_weights (line 461) | def _init_weights(self, m):
    method no_weight_decay (line 468) | def no_weight_decay(self):
    method group_matcher (line 472) | def group_matcher(self, coarse=False):
    method set_grad_checkpointing (line 480) | def set_grad_checkpointing(self, enable=True):
    method get_classifier (line 485) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 488) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method set_distilled_training (line 496) | def set_distilled_training(self, enable=True):
    method forward_intermediates (line 499) | def forward_intermediates(
    method prune_intermediate_layers (line 553) | def prune_intermediate_layers(
    method forward_features (line 569) | def forward_features(self, x):
    method forward_head (line 575) | def forward_head(self, x, pre_logits: bool = False):
    method forward (line 589) | def forward(self, x):
  function checkpoint_filter_fn (line 595) | def checkpoint_filter_fn(state_dict, model):
  function _cfg (line 622) | def _cfg(url='', **kwargs):
  function _create_efficientformer (line 647) | def _create_efficientformer(variant, pretrained=False, **kwargs):
  function efficientformer_l1 (line 659) | def efficientformer_l1(pretrained=False, **kwargs) -> EfficientFormer:
  function efficientformer_l3 (line 669) | def efficientformer_l3(pretrained=False, **kwargs) -> EfficientFormer:
  function efficientformer_l7 (line 679) | def efficientformer_l7(pretrained=False, **kwargs) -> EfficientFormer:

FILE: timm/models/efficientformer_v2.py
  class ConvNorm (line 69) | class ConvNorm(nn.Module):
    method __init__ (line 70) | def __init__(
    method forward (line 101) | def forward(self, x):
  class Attention2d (line 107) | class Attention2d(torch.nn.Module):
    method __init__ (line 110) | def __init__(
    method train (line 166) | def train(self, mode=True):
    method reset_parameters (line 171) | def reset_parameters(self) -> None:
    method _compute_attention_bias_idxs (line 176) | def _compute_attention_bias_idxs(self, device=None):
    method _init_buffers (line 186) | def _init_buffers(self) -> None:
    method init_non_persistent_buffers (line 193) | def init_non_persistent_buffers(self) -> None:
    method get_attention_biases (line 197) | def get_attention_biases(self, device: torch.device) -> torch.Tensor:
    method forward (line 206) | def forward(self, x):
  class LocalGlobalQuery (line 233) | class LocalGlobalQuery(torch.nn.Module):
    method __init__ (line 234) | def __init__(
    method forward (line 247) | def forward(self, x):
  class Attention2dDownsample (line 255) | class Attention2dDownsample(torch.nn.Module):
    method __init__ (line 258) | def __init__(
    method train (line 307) | def train(self, mode=True):
    method reset_parameters (line 312) | def reset_parameters(self) -> None:
    method _compute_attention_bias_idxs (line 317) | def _compute_attention_bias_idxs(self, device=None):
    method _init_buffers (line 331) | def _init_buffers(self) -> None:
    method init_non_persistent_buffers (line 338) | def init_non_persistent_buffers(self) -> None:
    method get_attention_biases (line 342) | def get_attention_biases(self, device: torch.device) -> torch.Tensor:
    method forward (line 351) | def forward(self, x):
  class Downsample (line 371) | class Downsample(nn.Module):
    method __init__ (line 372) | def __init__(
    method forward (line 414) | def forward(self, x):
  class ConvMlpWithNorm (line 421) | class ConvMlpWithNorm(nn.Module):
    method __init__ (line 427) | def __init__(
    method forward (line 469) | def forward(self, x):
  class EfficientFormerV2Block (line 478) | class EfficientFormerV2Block(nn.Module):
    method __init__ (line 479) | def __init__(
    method forward (line 526) | def forward(self, x):
  class Stem4 (line 533) | class Stem4(nn.Sequential):
    method __init__ (line 534) | def __init__(
  class EfficientFormerV2Stage (line 569) | class EfficientFormerV2Stage(nn.Module):
    method __init__ (line 571) | def __init__(
    method forward (line 632) | def forward(self, x):
  class EfficientFormerV2 (line 641) | class EfficientFormerV2(nn.Module):
    method __init__ (line 642) | def __init__(
    method _init_weights (line 727) | def _init_weights(self, m, needs_reset: bool = True):
    method init_weights (line 735) | def init_weights(self, needs_reset: bool = True):
    method no_weight_decay (line 739) | def no_weight_decay(self):
    method group_matcher (line 743) | def group_matcher(self, coarse=False):
    method set_grad_checkpointing (line 751) | def set_grad_checkpointing(self, enable=True):
    method get_classifier (line 756) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 759) | def reset_classifier(self, num_classes: int, global_pool: Optional[str...
    method set_distilled_training (line 767) | def set_distilled_training(self, enable=True):
    method forward_intermediates (line 770) | def forward_intermediates(
    method prune_intermediate_layers (line 821) | def prune_intermediate_layers(
    method forward_features (line 837) | def forward_features(self, x):
    method forward_head (line 843) | def forward_head(self, x, pre_logits: bool = False):
    method forward (line 857) | def forward(self, x):
  function _cfg (line 863) | def _cfg(url='', **kwargs):
  function _create_efficientformerv2 (line 891) | def _create_efficientformerv2(variant, pretrained=False, **kwargs):
  function efficientformerv2_s0 (line 901) | def efficientformerv2_s0(pretrained=False, **kwargs) -> EfficientFormerV2:
  function efficientformerv2_s1 (line 913) | def efficientformerv2_s1(pretrained=False, **kwargs) -> EfficientFormerV2:
  function efficientformerv2_s2 (line 925) | def efficientformerv2_s2(pretrained=False, **kwargs) -> EfficientFormerV2:
  function efficientformerv2_l (line 937) | def efficientformerv2_l(pretrained=False, **kwargs) -> EfficientFormerV2:

FILE: timm/models/efficientnet.py
  class EfficientNet (line 59) | class EfficientNet(nn.Module):
    method __init__ (line 81) | def __init__(
    method as_sequential (line 177) | def as_sequential(self) -> nn.Sequential:
    method group_matcher (line 186) | def group_matcher(self, coarse: bool = False) -> Dict[str, Union[str, ...
    method set_grad_checkpointing (line 204) | def set_grad_checkpointing(self, enable: bool = True) -> None:
    method get_classifier (line 213) | def get_classifier(self) -> nn.Module:
    method reset_classifier (line 217) | def reset_classifier(self, num_classes: int, global_pool: str = 'avg')...
    method forward_intermediates (line 228) | def forward_intermediates(
    method prune_intermediate_layers (line 288) | def prune_intermediate_layers(
    method forward_features (line 319) | def forward_features(self, x: torch.Tensor) -> torch.Tensor:
    method forward_head (line 331) | def forward_head(self, x: torch.Tensor, pre_logits: bool = False) -> t...
    method forward (line 346) | def forward(self, x: torch.Tensor) -> torch.Tensor:
  class EfficientNetFeatures (line 353) | class EfficientNetFeatures(nn.Module):
    method __init__ (line 360) | def __init__(
    method set_grad_checkpointing (line 423) | def set_grad_checkpointing(self, enable: bool = True) -> None:
    method forward (line 431) | def forward(self, x) -> List[torch.Tensor]:
  function _create_effnet (line 452) | def _create_effnet(variant, pretrained=False, **kwargs):
  function _gen_mnasnet_a1 (line 479) | def _gen_mnasnet_a1(variant, channel_multiplier=1.0, pretrained=False, *...
  function _gen_mnasnet_b1 (line 515) | def _gen_mnasnet_b1(variant, channel_multiplier=1.0, pretrained=False, *...
  function _gen_mnasnet_small (line 551) | def _gen_mnasnet_small(variant, channel_multiplier=1.0, pretrained=False...
  function _gen_mobilenet_v1 (line 580) | def _gen_mobilenet_v1(
  function _gen_mobilenet_v2 (line 616) | def _gen_mobilenet_v2(
  function _gen_fbnetc (line 653) | def _gen_fbnetc(variant, channel_multiplier=1.0, pretrained=False, **kwa...
  function _gen_spnasnet (line 683) | def _gen_spnasnet(variant, channel_multiplier=1.0, pretrained=False, **k...
  function _gen_efficientnet (line 718) | def _gen_efficientnet(
  function _gen_efficientnet_edge (line 768) | def _gen_efficientnet_edge(
  function _gen_efficientnet_condconv (line 800) | def _gen_efficientnet_condconv(
  function _gen_efficientnet_lite (line 832) | def _gen_efficientnet_lite(variant, channel_multiplier=1.0, depth_multip...
  function _gen_efficientnetv2_base (line 873) | def _gen_efficientnetv2_base(
  function _gen_efficientnetv2_s (line 903) | def _gen_efficientnetv2_s(
  function _gen_efficientnetv2_m (line 943) | def _gen_efficientnetv2_m(
  function _gen_efficientnetv2_l (line 975) | def _gen_efficientnetv2_l(
  function _gen_efficientnetv2_xl (line 1007) | def _gen_efficientnetv2_xl(
  function _gen_efficientnet_x (line 1039) | def _gen_efficientnet_x(
  function _gen_mixnet_s (line 1122) | def _gen_mixnet_s(variant, channel_multiplier=1.0, pretrained=False, **k...
  function _gen_mixnet_m (line 1155) | def _gen_mixnet_m(variant, channel_multiplier=1.0, depth_multiplier=1.0,...
  function _gen_tinynet (line 1188) | def _gen_tinynet(variant, model_width=1.0, depth_multiplier=1.0, pretrai...
  function _gen_mobilenet_edgetpu (line 1211) | def _gen_mobilenet_edgetpu(variant, channel_multiplier=1.0, depth_multip...
  function _gen_test_efficientnet (line 1300) | def _gen_test_efficientnet(variant, channel_multiplier=1.0, depth_multip...
  function _cfg (line 1324) | def _cfg(url='', **kwargs):
  function mnasnet_050 (line 1923) | def mnasnet_050(pretrained=False, **kwargs) -> EfficientNet:
  function mnasnet_075 (line 1930) | def mnasnet_075(pretrained=False, **kwargs) -> EfficientNet:
  function mnasnet_100 (line 1937) | def mnasnet_100(pretrained=False, **kwargs) -> EfficientNet:
  function mnasnet_140 (line 1944) | def mnasnet_140(pretrained=False, **kwargs) -> EfficientNet:
  function semnasnet_050 (line 1951) | def semnasnet_050(pretrained=False, **kwargs) -> EfficientNet:
  function semnasnet_075 (line 1958) | def semnasnet_075(pretrained=False, **kwargs) -> EfficientNet:
  function semnasnet_100 (line 1965) | def semnasnet_100(pretrained=False, **kwargs) -> EfficientNet:
  function semnasnet_140 (line 1972) | def semnasnet_140(pretrained=False, **kwargs) -> EfficientNet:
  function mnasnet_small (line 1979) | def mnasnet_small(pretrained=False, **kwargs) -> EfficientNet:
  function mobilenetv1_100 (line 1986) | def mobilenetv1_100(pretrained=False, **kwargs) -> EfficientNet:
  function mobilenetv1_100h (line 1993) | def mobilenetv1_100h(pretrained=False, **kwargs) -> EfficientNet:
  function mobilenetv1_125 (line 2000) | def mobilenetv1_125(pretrained=False, **kwargs) -> EfficientNet:
  function mobilenetv2_035 (line 2007) | def mobilenetv2_035(pretrained=False, **kwargs) -> EfficientNet:
  function mobilenetv2_050 (line 2014) | def mobilenetv2_050(pretrained=False, **kwargs) -> EfficientNet:
  function mobilenetv2_075 (line 2021) | def mobilenetv2_075(pretrained=False, **kwargs) -> EfficientNet:
  function mobilenetv2_100 (line 2028) | def mobilenetv2_100(pretrained=False, **kwargs) -> EfficientNet:
  function mobilenetv2_140 (line 2035) | def mobilenetv2_140(pretrained=False, **kwargs) -> EfficientNet:
  function mobilenetv2_110d (line 2042) | def mobilenetv2_110d(pretrained=False, **kwargs) -> EfficientNet:
  function mobilenetv2_120d (line 2050) | def mobilenetv2_120d(pretrained=False, **kwargs) -> EfficientNet:
  function fbnetc_100 (line 2058) | def fbnetc_100(pretrained=False, **kwargs) -> EfficientNet:
  function spnasnet_100 (line 2068) | def spnasnet_100(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b0 (line 2075) | def efficientnet_b0(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b1 (line 2084) | def efficientnet_b1(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b2 (line 2093) | def efficientnet_b2(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b3 (line 2102) | def efficientnet_b3(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b4 (line 2111) | def efficientnet_b4(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b5 (line 2120) | def efficientnet_b5(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b6 (line 2129) | def efficientnet_b6(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b7 (line 2138) | def efficientnet_b7(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b8 (line 2147) | def efficientnet_b8(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_l2 (line 2156) | def efficientnet_l2(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b0_gn (line 2166) | def efficientnet_b0_gn(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b0_g8_gn (line 2174) | def efficientnet_b0_g8_gn(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b0_g16_evos (line 2183) | def efficientnet_b0_g16_evos(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b3_gn (line 2192) | def efficientnet_b3_gn(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b3_g8_gn (line 2202) | def efficientnet_b3_g8_gn(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_blur_b0 (line 2212) | def efficientnet_blur_b0(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_es (line 2223) | def efficientnet_es(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_es_pruned (line 2231) | def efficientnet_es_pruned(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_em (line 2238) | def efficientnet_em(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_el (line 2246) | def efficientnet_el(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_el_pruned (line 2253) | def efficientnet_el_pruned(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_cc_b0_4e (line 2260) | def efficientnet_cc_b0_4e(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_cc_b0_8e (line 2269) | def efficientnet_cc_b0_8e(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_cc_b1_8e (line 2279) | def efficientnet_cc_b1_8e(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_lite0 (line 2289) | def efficientnet_lite0(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_lite1 (line 2298) | def efficientnet_lite1(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_lite2 (line 2307) | def efficientnet_lite2(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_lite3 (line 2316) | def efficientnet_lite3(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_lite4 (line 2325) | def efficientnet_lite4(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b1_pruned (line 2334) | def efficientnet_b1_pruned(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b2_pruned (line 2345) | def efficientnet_b2_pruned(pretrained=False, **kwargs) -> EfficientNet:
  function efficientnet_b3_pruned (line 2356) | def efficientnet_b3_pruned(pretrained=False
Copy disabled (too large) Download .json
Condensed preview — 463 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (13,464K chars).
[
  {
    "path": ".gitattributes",
    "chars": 31,
    "preview": "*.ipynb linguist-documentation\n"
  },
  {
    "path": ".github/FUNDING.yml",
    "chars": 64,
    "preview": "# These are supported funding model platforms\ngithub: rwightman\n"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/bug_report.md",
    "chars": 905,
    "preview": "---\nname: Bug report\nabout: Create a bug report to help us improve. Issues are for reporting bugs or requesting\n  featur"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/config.yml",
    "chars": 272,
    "preview": "blank_issues_enabled: false\ncontact_links:\n  - name: Community Discussions\n    url: https://github.com/rwightman/pytorch"
  },
  {
    "path": ".github/ISSUE_TEMPLATE/feature_request.md",
    "chars": 744,
    "preview": "---\nname: Feature request\nabout: Suggest an idea for this project. Hparam requests, training help are not feature reques"
  },
  {
    "path": ".github/workflows/build_documentation.yml",
    "chars": 462,
    "preview": "name: Build documentation\n\non:\n  push:\n    branches:\n      - main\n      - doc-builder*\n      - v*-release\n\njobs:\n   buil"
  },
  {
    "path": ".github/workflows/build_pr_documentation.yml",
    "chars": 522,
    "preview": "name: Build PR Documentation\n\non:\n  pull_request:\n\nconcurrency:\n  group: ${{ github.workflow }}-${{ github.head_ref || g"
  },
  {
    "path": ".github/workflows/tests.yml",
    "chars": 2436,
    "preview": "name: Python tests\n\non:\n  push:\n    branches: [ main ]\n  pull_request:\n    branches: [ main ]\n\nenv:\n  OMP_NUM_THREADS: 2"
  },
  {
    "path": ".github/workflows/trufflehog.yml",
    "chars": 256,
    "preview": "on:\n  push:\n\nname: Secret Leaks\n\njobs:\n  trufflehog:\n    runs-on: ubuntu-latest\n    steps:\n    - name: Checkout code\n   "
  },
  {
    "path": ".github/workflows/upload_pr_documentation.yml",
    "chars": 377,
    "preview": "name: Upload PR Documentation\n\non:\n  workflow_run:\n    workflows: [\"Build PR Documentation\"]\n    types:\n      - complete"
  },
  {
    "path": ".gitignore",
    "chars": 1340,
    "preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
  },
  {
    "path": "CITATION.cff",
    "chars": 307,
    "preview": "message: \"If you use this software, please cite it as below.\"\ntitle: \"PyTorch Image Models\"\nversion: \"1.2.2\"\ndoi: \"10.52"
  },
  {
    "path": "CLAUDE.md",
    "chars": 901,
    "preview": "# CLAUDE.md - PyTorch Image Models (timm)\n\n## Build/Test Commands\n- Install: `python -m pip install -e .`\n- Run tests: `"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "chars": 5458,
    "preview": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to participate in"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 4704,
    "preview": "*This guideline is very much a work-in-progress.*\n\nContributions to `timm` for code, documentation, tests are more than "
  },
  {
    "path": "LICENSE",
    "chars": 11343,
    "preview": "                                 Apache License\n                           Version 2.0, January 2004\n                   "
  },
  {
    "path": "MANIFEST.in",
    "chars": 95,
    "preview": "include timm/models/_pruned/*.txt\ninclude timm/data/_info/*.txt\ninclude timm/data/_info/*.json\n"
  },
  {
    "path": "README.md",
    "chars": 37755,
    "preview": "# PyTorch Image Models\n- [What's New](#whats-new)\n- [Introduction](#introduction)\n- [Models](#models)\n- [Features](#feat"
  },
  {
    "path": "UPGRADING.md",
    "chars": 2524,
    "preview": "# Upgrading from previous versions\n\nI generally try to maintain code interface and especially model weight compatibility"
  },
  {
    "path": "avg_checkpoints.py",
    "chars": 5995,
    "preview": "#!/usr/bin/env python3\n\"\"\" Checkpoint Averaging Script\n\nThis script averages all model weights for checkpoints in specif"
  },
  {
    "path": "benchmark.py",
    "chars": 28381,
    "preview": "#!/usr/bin/env python3\n\"\"\" Model Benchmark Script\n\nAn inference and train step benchmark script for timm models.\n\nHacked"
  },
  {
    "path": "bulk_runner.py",
    "chars": 8608,
    "preview": "#!/usr/bin/env python3\n\"\"\" Bulk Model Script Runner\n\nRun validation or benchmark script in separate process for each mod"
  },
  {
    "path": "clean_checkpoint.py",
    "chars": 4221,
    "preview": "#!/usr/bin/env python3\n\"\"\" Checkpoint Cleaning Script\n\nTakes training checkpoints with GPU tensors, optimizer state, ext"
  },
  {
    "path": "convert/convert_from_mxnet.py",
    "chars": 4033,
    "preview": "import argparse\nimport hashlib\nimport os\n\nimport mxnet as mx\nimport gluoncv\nimport torch\nfrom timm import create_model\n\n"
  },
  {
    "path": "convert/convert_nest_flax.py",
    "chars": 5582,
    "preview": "\"\"\"\nConvert weights from https://github.com/google-research/nested-transformer\nNOTE: You'll need https://github.com/goog"
  },
  {
    "path": "distributed_train.sh",
    "chars": 81,
    "preview": "#!/bin/bash\nNUM_PROC=$1\nshift\ntorchrun --nproc_per_node=$NUM_PROC train.py \"$@\"\n\n"
  },
  {
    "path": "hfdocs/README.md",
    "chars": 245,
    "preview": "# Hugging Face Timm Docs\n\n## Getting Started\n\n```\npip install git+https://github.com/huggingface/doc-builder.git@main#eg"
  },
  {
    "path": "hfdocs/source/_toctree.yml",
    "chars": 4425,
    "preview": "- sections: \n  - local: index\n    title: Home\n  - local: quickstart\n    title: Quickstart\n  - local: installation\n    ti"
  },
  {
    "path": "hfdocs/source/changes.mdx",
    "chars": 123847,
    "preview": "# Changelog\n\n## Dec 12, 2025\n* Add CSATV2 model (thanks https://github.com/gusdlf93) -- a lightweight but high res model"
  },
  {
    "path": "hfdocs/source/feature_extraction.mdx",
    "chars": 10948,
    "preview": "# Feature Extraction\n\nAll of the models in `timm` have consistent mechanisms for obtaining various types of features fro"
  },
  {
    "path": "hfdocs/source/hf_hub.mdx",
    "chars": 1858,
    "preview": "# Sharing and Loading Models From the Hugging Face Hub\n\nThe `timm` library has a built-in integration with the Hugging F"
  },
  {
    "path": "hfdocs/source/hparams.mdx",
    "chars": 3560,
    "preview": "# HParams\nOver the years, many `timm` models have been trained with various hyper-parameters as the libraries and models"
  },
  {
    "path": "hfdocs/source/index.mdx",
    "chars": 1579,
    "preview": "# timm\n\n<img class=\"float-left !m-0 !border-0 !dark:border-0 !shadow-none !max-w-lg w-[150px]\" src=\"https://huggingface."
  },
  {
    "path": "hfdocs/source/installation.mdx",
    "chars": 2057,
    "preview": "# Installation\n\nBefore you start, you'll need to setup your environment and install the appropriate packages. `timm` is "
  },
  {
    "path": "hfdocs/source/models/adversarial-inception-v3.mdx",
    "chars": 5893,
    "preview": "# Adversarial Inception v3\n\n**Inception v3** is a convolutional neural network architecture from the Inception family th"
  },
  {
    "path": "hfdocs/source/models/advprop.mdx",
    "chars": 14770,
    "preview": "# AdvProp (EfficientNet)\n\n**AdvProp** is an adversarial training scheme which treats adversarial examples as additional "
  },
  {
    "path": "hfdocs/source/models/big-transfer.mdx",
    "chars": 10307,
    "preview": "# Big Transfer (BiT)\n\n**Big Transfer (BiT)** is a type of pretraining recipe that pre-trains  on a large supervised sour"
  },
  {
    "path": "hfdocs/source/models/csp-darknet.mdx",
    "chars": 4936,
    "preview": "# CSP-DarkNet\n\n**CSPDarknet53** is a convolutional neural network and backbone for object detection that uses [DarkNet-5"
  },
  {
    "path": "hfdocs/source/models/csp-resnet.mdx",
    "chars": 4795,
    "preview": "# CSP-ResNet\n\n**CSPResNet** is a convolutional neural network where we apply the Cross Stage Partial Network (CSPNet) ap"
  },
  {
    "path": "hfdocs/source/models/csp-resnext.mdx",
    "chars": 4835,
    "preview": "# CSP-ResNeXt\n\n**CSPResNeXt** is a convolutional neural network where we apply the Cross Stage Partial Network (CSPNet) "
  },
  {
    "path": "hfdocs/source/models/densenet.mdx",
    "chars": 10553,
    "preview": "# DenseNet\n\n**DenseNet** is a type of convolutional neural network that utilises dense connections between layers, throu"
  },
  {
    "path": "hfdocs/source/models/dla.mdx",
    "chars": 16522,
    "preview": "# Deep Layer Aggregation\n\nExtending  “shallow” skip connections, **Dense Layer Aggregation (DLA)** incorporates more dep"
  },
  {
    "path": "hfdocs/source/models/dpn.mdx",
    "chars": 9525,
    "preview": "# Dual Path Network (DPN)\n\nA **Dual Path Network (DPN)** is a convolutional neural network which presents a new topology"
  },
  {
    "path": "hfdocs/source/models/ecaresnet.mdx",
    "chars": 9349,
    "preview": "# ECA-ResNet\n\nAn **ECA ResNet** is a variant on a [ResNet](https://paperswithcode.com/method/resnet) that utilises an [E"
  },
  {
    "path": "hfdocs/source/models/efficientnet-pruned.mdx",
    "chars": 7756,
    "preview": "# EfficientNet (Knapsack Pruned)\n\n**EfficientNet** is a convolutional neural network architecture and scaling method tha"
  },
  {
    "path": "hfdocs/source/models/efficientnet.mdx",
    "chars": 12864,
    "preview": "# EfficientNet\n\n**EfficientNet** is a convolutional neural network architecture and scaling method that uniformly scales"
  },
  {
    "path": "hfdocs/source/models/ensemble-adversarial.mdx",
    "chars": 5695,
    "preview": "# # Ensemble Adversarial Inception ResNet v2\n\n**Inception-ResNet-v2** is a convolutional neural architecture that builds"
  },
  {
    "path": "hfdocs/source/models/ese-vovnet.mdx",
    "chars": 5270,
    "preview": "# ESE-VoVNet\n\n**VoVNet** is a convolutional neural network that seeks to make [DenseNet](https://paperswithcode.com/meth"
  },
  {
    "path": "hfdocs/source/models/fbnet.mdx",
    "chars": 4867,
    "preview": "# FBNet\n\n**FBNet** is a type of convolutional neural architectures discovered through [DNAS](https://paperswithcode.com/"
  },
  {
    "path": "hfdocs/source/models/gloun-inception-v3.mdx",
    "chars": 5259,
    "preview": "# (Gluon) Inception v3\n\n**Inception v3** is a convolutional neural network architecture from the Inception family that m"
  },
  {
    "path": "hfdocs/source/models/gloun-resnet.mdx",
    "chars": 17315,
    "preview": "# (Gluon) ResNet\n\n**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, in"
  },
  {
    "path": "hfdocs/source/models/gloun-resnext.mdx",
    "chars": 6949,
    "preview": "# (Gluon) ResNeXt\n\nA **ResNeXt** repeats a [building block](https://paperswithcode.com/method/resnext-block) that aggreg"
  },
  {
    "path": "hfdocs/source/models/gloun-senet.mdx",
    "chars": 4362,
    "preview": "# (Gluon) SENet\n\nA **SENet** is a convolutional neural network architecture that employs [squeeze-and-excitation blocks]"
  },
  {
    "path": "hfdocs/source/models/gloun-seresnext.mdx",
    "chars": 6556,
    "preview": "# (Gluon) SE-ResNeXt\n\n**SE ResNeXt** is a variant of a [ResNext](https://www.paperswithcode.com/method/resnext) that emp"
  },
  {
    "path": "hfdocs/source/models/gloun-xception.mdx",
    "chars": 4401,
    "preview": "# (Gluon) Xception\n\n**Xception** is a convolutional neural network architecture that relies solely on [depthwise separab"
  },
  {
    "path": "hfdocs/source/models/hrnet.mdx",
    "chars": 12921,
    "preview": "# HRNet\n\n**HRNet**, or **High-Resolution Net**, is a general purpose convolutional neural network for tasks like semanti"
  },
  {
    "path": "hfdocs/source/models/ig-resnext.mdx",
    "chars": 8429,
    "preview": "# Instagram ResNeXt WSL\n\nA **ResNeXt** repeats a [building block](https://paperswithcode.com/method/resnext-block) that "
  },
  {
    "path": "hfdocs/source/models/inception-resnet-v2.mdx",
    "chars": 4702,
    "preview": "# Inception ResNet v2\n\n**Inception-ResNet-v2** is a convolutional neural architecture that builds on the Inception famil"
  },
  {
    "path": "hfdocs/source/models/inception-v3.mdx",
    "chars": 5261,
    "preview": "# Inception v3\n\n**Inception v3** is a convolutional neural network architecture from the Inception family that makes sev"
  },
  {
    "path": "hfdocs/source/models/inception-v4.mdx",
    "chars": 4551,
    "preview": "# Inception v4\n\n**Inception-v4** is a convolutional neural network architecture that builds on previous iterations of th"
  },
  {
    "path": "hfdocs/source/models/legacy-se-resnet.mdx",
    "chars": 9467,
    "preview": "# (Legacy) SE-ResNet\n\n**SE ResNet** is a variant of a [ResNet](https://www.paperswithcode.com/method/resnet) that employ"
  },
  {
    "path": "hfdocs/source/models/legacy-se-resnext.mdx",
    "chars": 7058,
    "preview": "# (Legacy) SE-ResNeXt\n\n**SE ResNeXt** is a variant of a [ResNeXt](https://www.paperswithcode.com/method/resnext) that em"
  },
  {
    "path": "hfdocs/source/models/legacy-senet.mdx",
    "chars": 4503,
    "preview": "# (Legacy) SENet\n\nA **SENet** is a convolutional neural network architecture that employs [squeeze-and-excitation blocks"
  },
  {
    "path": "hfdocs/source/models/mixnet.mdx",
    "chars": 6989,
    "preview": "# MixNet\n\n**MixNet** is a type of convolutional neural network discovered via AutoML that utilises [MixConvs](https://pa"
  },
  {
    "path": "hfdocs/source/models/mnasnet.mdx",
    "chars": 5818,
    "preview": "# MnasNet\n\n**MnasNet** is a type of convolutional neural network optimized for mobile devices that is discovered through"
  },
  {
    "path": "hfdocs/source/models/mobilenet-v2.mdx",
    "chars": 8720,
    "preview": "# MobileNet v2\n\n**MobileNetV2** is a convolutional neural network architecture that seeks to perform well on mobile devi"
  },
  {
    "path": "hfdocs/source/models/mobilenet-v3.mdx",
    "chars": 6548,
    "preview": "# MobileNet v3\n\n**MobileNetV3** is a convolutional neural network that is designed for mobile phone CPUs. The network de"
  },
  {
    "path": "hfdocs/source/models/nasnet.mdx",
    "chars": 4372,
    "preview": "# NASNet\n\n**NASNet** is a type of convolutional neural network discovered through neural architecture search. The buildi"
  },
  {
    "path": "hfdocs/source/models/noisy-student.mdx",
    "chars": 16650,
    "preview": "# Noisy Student (EfficientNet)\n\n**Noisy Student Training** is a semi-supervised learning approach. It extends the idea o"
  },
  {
    "path": "hfdocs/source/models/pnasnet.mdx",
    "chars": 4663,
    "preview": "# PNASNet\n\n**Progressive Neural Architecture Search**, or **PNAS**, is a method for learning the structure of convolutio"
  },
  {
    "path": "hfdocs/source/models/regnetx.mdx",
    "chars": 16283,
    "preview": "# RegNetX\n\n**RegNetX** is a convolutional network design space with simple, regular models with parameters: depth \\\\( d "
  },
  {
    "path": "hfdocs/source/models/regnety.mdx",
    "chars": 16871,
    "preview": "# RegNetY\n\n**RegNetY** is a convolutional network design space with simple, regular models with parameters: depth \\\\( d "
  },
  {
    "path": "hfdocs/source/models/res2net.mdx",
    "chars": 10042,
    "preview": "# Res2Net\n\n**Res2Net** is an image model that employs a variation on bottleneck residual blocks, [Res2Net Blocks](https:"
  },
  {
    "path": "hfdocs/source/models/res2next.mdx",
    "chars": 4921,
    "preview": "# Res2NeXt\n\n**Res2NeXt** is an image model that employs a variation on [ResNeXt](https://paperswithcode.com/method/resne"
  },
  {
    "path": "hfdocs/source/models/resnest.mdx",
    "chars": 13565,
    "preview": "# ResNeSt\n\nA **ResNeSt** is a variant on a [ResNet](https://paperswithcode.com/method/resnet), which instead stacks [Spl"
  },
  {
    "path": "hfdocs/source/models/resnet-d.mdx",
    "chars": 10049,
    "preview": "# ResNet-D\n\n**ResNet-D** is a modification on the [ResNet](https://paperswithcode.com/method/resnet) architecture that u"
  },
  {
    "path": "hfdocs/source/models/resnet.mdx",
    "chars": 12654,
    "preview": "# ResNet\n\n**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instead of"
  },
  {
    "path": "hfdocs/source/models/resnext.mdx",
    "chars": 7746,
    "preview": "# ResNeXt\n\nA **ResNeXt** repeats a [building block](https://paperswithcode.com/method/resnext-block) that aggregates a s"
  },
  {
    "path": "hfdocs/source/models/rexnet.mdx",
    "chars": 7951,
    "preview": "# RexNet\n\n**Rank Expansion Networks** (ReXNets) follow a set of new design principles for designing bottlenecks in image"
  },
  {
    "path": "hfdocs/source/models/se-resnet.mdx",
    "chars": 5798,
    "preview": "# SE-ResNet\n\n**SE ResNet** is a variant of a [ResNet](https://www.paperswithcode.com/method/resnet) that employs [squeez"
  },
  {
    "path": "hfdocs/source/models/selecsls.mdx",
    "chars": 6360,
    "preview": "# SelecSLS\n\n**SelecSLS** uses novel selective long and short range skip connections to improve the information flow allo"
  },
  {
    "path": "hfdocs/source/models/seresnext.mdx",
    "chars": 7035,
    "preview": "# SE-ResNeXt\n\n**SE ResNeXt** is a variant of a [ResNext](https://www.paperswithcode.com/method/resneXt) that employs [sq"
  },
  {
    "path": "hfdocs/source/models/skresnet.mdx",
    "chars": 5670,
    "preview": "# SK-ResNet\n\n**SK ResNet** is a variant of a [ResNet](https://www.paperswithcode.com/method/resnet) that employs a [Sele"
  },
  {
    "path": "hfdocs/source/models/skresnext.mdx",
    "chars": 4620,
    "preview": "# SK-ResNeXt\n\n**SK ResNeXt** is a variant of a [ResNeXt](https://www.paperswithcode.com/method/resnext) that employs a ["
  },
  {
    "path": "hfdocs/source/models/spnasnet.mdx",
    "chars": 4266,
    "preview": "# SPNASNet\n\n**Single-Path NAS** is a novel differentiable NAS method for designing hardware-efficient ConvNets in less t"
  },
  {
    "path": "hfdocs/source/models/ssl-resnet.mdx",
    "chars": 6582,
    "preview": "# SSL ResNet\n\n**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, instea"
  },
  {
    "path": "hfdocs/source/models/swsl-resnet.mdx",
    "chars": 6624,
    "preview": "# SWSL ResNet\n\n**Residual Networks**, or **ResNets**, learn residual functions with reference to the layer inputs, inste"
  },
  {
    "path": "hfdocs/source/models/swsl-resnext.mdx",
    "chars": 8930,
    "preview": "# SWSL ResNeXt\n\nA **ResNeXt** repeats a [building block](https://paperswithcode.com/method/resnext-block) that aggregate"
  },
  {
    "path": "hfdocs/source/models/tf-efficientnet-condconv.mdx",
    "chars": 8947,
    "preview": "# (Tensorflow) EfficientNet CondConv\n\n**EfficientNet** is a convolutional neural network architecture and scaling method"
  },
  {
    "path": "hfdocs/source/models/tf-efficientnet-lite.mdx",
    "chars": 9387,
    "preview": "# (Tensorflow) EfficientNet Lite\n\n**EfficientNet** is a convolutional neural network architecture and scaling method tha"
  },
  {
    "path": "hfdocs/source/models/tf-efficientnet.mdx",
    "chars": 19972,
    "preview": "# (Tensorflow) EfficientNet\n\n**EfficientNet** is a convolutional neural network architecture and scaling method that uni"
  },
  {
    "path": "hfdocs/source/models/tf-inception-v3.mdx",
    "chars": 5440,
    "preview": "# (Tensorflow) Inception v3\n\n**Inception v3** is a convolutional neural network architecture from the Inception family t"
  },
  {
    "path": "hfdocs/source/models/tf-mixnet.mdx",
    "chars": 6237,
    "preview": "# (Tensorflow) MixNet\n\n**MixNet** is a type of convolutional neural network discovered via AutoML that utilises [MixConv"
  },
  {
    "path": "hfdocs/source/models/tf-mobilenet-v3.mdx",
    "chars": 11719,
    "preview": "# (Tensorflow) MobileNet v3\n\n**MobileNetV3** is a convolutional neural network that is designed for mobile phone CPUs. T"
  },
  {
    "path": "hfdocs/source/models/tresnet.mdx",
    "chars": 10555,
    "preview": "# TResNet\n\nA **TResNet** is a variant on a [ResNet](https://paperswithcode.com/method/resnet) that aim to boost accuracy"
  },
  {
    "path": "hfdocs/source/models/wide-resnet.mdx",
    "chars": 5440,
    "preview": "# Wide ResNet\n\n**Wide Residual Networks** are a variant on [ResNets](https://paperswithcode.com/method/resnet) where we "
  },
  {
    "path": "hfdocs/source/models/xception.mdx",
    "chars": 7087,
    "preview": "# Xception\n\n**Xception** is a convolutional neural network architecture that relies solely on [depthwise separable convo"
  },
  {
    "path": "hfdocs/source/models.mdx",
    "chars": 12551,
    "preview": "# Model Summaries\n\nThe model architectures included come from a wide variety of sources. Sources, including papers, orig"
  },
  {
    "path": "hfdocs/source/quickstart.mdx",
    "chars": 7431,
    "preview": "# Quickstart\n\nThis quickstart is intended for developers who are ready to dive into the code and see an example of how t"
  },
  {
    "path": "hfdocs/source/reference/data.mdx",
    "chars": 164,
    "preview": "# Data\n\n[[autodoc]] timm.data.create_dataset\n\n[[autodoc]] timm.data.create_loader\n\n[[autodoc]] timm.data.create_transfor"
  },
  {
    "path": "hfdocs/source/reference/models.mdx",
    "chars": 70,
    "preview": "# Models\n\n[[autodoc]] timm.create_model\n\n[[autodoc]] timm.list_models\n"
  },
  {
    "path": "hfdocs/source/reference/optimizers.mdx",
    "chars": 1035,
    "preview": "# Optimization\n\nThis page contains the API reference documentation for learning rate optimizers included in `timm`.\n\n## "
  },
  {
    "path": "hfdocs/source/reference/schedulers.mdx",
    "chars": 644,
    "preview": "# Learning Rate Schedulers\n\nThis page contains the API reference documentation for learning rate schedulers included in "
  },
  {
    "path": "hfdocs/source/results.mdx",
    "chars": 4853,
    "preview": "# Results\n\nCSV files containing an ImageNet-1K and out-of-distribution (OOD) test set validation results for all models "
  },
  {
    "path": "hfdocs/source/training_script.mdx",
    "chars": 7039,
    "preview": "# Scripts\n\nA train, validation, inference, and checkpoint cleaning script included in the github root folder. Scripts ar"
  },
  {
    "path": "hubconf.py",
    "chars": 96,
    "preview": "dependencies = ['torch']\nimport timm\nglobals().update(timm.models._registry._model_entrypoints)\n"
  },
  {
    "path": "inference.py",
    "chars": 16658,
    "preview": "#!/usr/bin/env python3\n\"\"\"PyTorch Inference Script\n\nAn example inference script that outputs top-k class ids for images "
  },
  {
    "path": "onnx_export.py",
    "chars": 5151,
    "preview": "\"\"\" ONNX export script\n\nExport PyTorch models as ONNX graphs.\n\nThis export script originally started as an adaptation of"
  },
  {
    "path": "onnx_validate.py",
    "chars": 4544,
    "preview": "\"\"\" ONNX-runtime validation script\n\nThis script was created to verify accuracy and performance of exported ONNX\nmodels r"
  },
  {
    "path": "pyproject.toml",
    "chars": 2110,
    "preview": "[build-system]\nrequires = [\"pdm-backend\"]\nbuild-backend = \"pdm.backend\"\n\n[project]\nname = \"timm\"\nauthors = [\n    {name ="
  },
  {
    "path": "requirements-dev.txt",
    "chars": 60,
    "preview": "pytest\npytest-timeout\npytest-xdist\npytest-forked\nexpecttest\n"
  },
  {
    "path": "requirements.txt",
    "chars": 77,
    "preview": "torch>=1.7\ntorchvision\npyyaml\nhuggingface_hub>=0.17.0\nsafetensors>=0.2\nnumpy\n"
  },
  {
    "path": "results/README.md",
    "chars": 4194,
    "preview": "# Validation and Benchmark Results\n\nThis folder contains validation and benchmark results for the models in this collect"
  },
  {
    "path": "results/benchmark-infer-amp-nchw-pt113-cu117-rtx3090.csv",
    "chars": 55805,
    "preview": "model,infer_samples_per_sec,infer_step_time,infer_batch_size,infer_img_size,infer_gmacs,infer_macts,param_count\r\ntinynet"
  },
  {
    "path": "results/benchmark-infer-amp-nchw-pt210-cu121-rtx3090.csv",
    "chars": 77499,
    "preview": "model,infer_img_size,infer_batch_size,infer_samples_per_sec,infer_step_time,infer_gmacs,infer_macts,param_count\ntinynet_"
  },
  {
    "path": "results/benchmark-infer-amp-nchw-pt240-cu124-rtx3090.csv",
    "chars": 84324,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\ntest_vit"
  },
  {
    "path": "results/benchmark-infer-amp-nchw-pt240-cu124-rtx4090-dynamo.csv",
    "chars": 82461,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\ntest_eff"
  },
  {
    "path": "results/benchmark-infer-amp-nchw-pt240-cu124-rtx4090.csv",
    "chars": 84400,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\ntest_vit"
  },
  {
    "path": "results/benchmark-infer-amp-nchw-pt291-cu128-4090-dynamo.csv",
    "chars": 97873,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\r\ntest_vi"
  },
  {
    "path": "results/benchmark-infer-amp-nchw-pt291-cu128-4090.csv",
    "chars": 97821,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\r\ntest_vi"
  },
  {
    "path": "results/benchmark-infer-amp-nchw-pt291-cu130-5090-dynamo.csv",
    "chars": 98025,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\r\ntest_vi"
  },
  {
    "path": "results/benchmark-infer-amp-nchw-pt291-cu130-5090.csv",
    "chars": 97996,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\r\ntest_vi"
  },
  {
    "path": "results/benchmark-infer-amp-nchw-pt291-cu130-pro6000maxq-dynamo.csv",
    "chars": 98619,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\r\ntest_vi"
  },
  {
    "path": "results/benchmark-infer-amp-nchw-pt291-cu130-pro6000maxq.csv",
    "chars": 98857,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\r\ntest_vi"
  },
  {
    "path": "results/benchmark-infer-amp-nhwc-pt113-cu117-rtx3090.csv",
    "chars": 54718,
    "preview": "model,infer_samples_per_sec,infer_step_time,infer_batch_size,infer_img_size,infer_gmacs,infer_macts,param_count\ntinynet_"
  },
  {
    "path": "results/benchmark-infer-amp-nhwc-pt210-cu121-rtx3090.csv",
    "chars": 72346,
    "preview": "model,infer_img_size,infer_batch_size,infer_samples_per_sec,infer_step_time,infer_gmacs,infer_macts,param_count\ntinynet_"
  },
  {
    "path": "results/benchmark-infer-amp-nhwc-pt240-cu124-rtx3090.csv",
    "chars": 83629,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\ntest_eff"
  },
  {
    "path": "results/benchmark-infer-amp-nhwc-pt240-cu124-rtx4090.csv",
    "chars": 82822,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\ntest_vit"
  },
  {
    "path": "results/benchmark-infer-amp_bf16-nchw-pt291-cu130-pro6000maxq-dynamo.csv",
    "chars": 98629,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\r\ntest_vi"
  },
  {
    "path": "results/benchmark-infer-bf16-nchw-pt291-cu130-pro6000maxq-dynamo.csv",
    "chars": 98150,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\r\ntest_vi"
  },
  {
    "path": "results/benchmark-infer-fp32-nchw-pt221-cpu-i9_10940x-dynamo.csv",
    "chars": 55575,
    "preview": "model,infer_samples_per_sec,infer_step_time,infer_batch_size,infer_img_size,param_count\r\ntf_mobilenetv3_small_minimal_10"
  },
  {
    "path": "results/benchmark-infer-fp32-nchw-pt240-cpu-i7_12700h-dynamo.csv",
    "chars": 76170,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\ntest_byo"
  },
  {
    "path": "results/benchmark-infer-fp32-nchw-pt240-cpu-i9_10940x-dynamo.csv",
    "chars": 75669,
    "preview": "model,infer_img_size,infer_samples_per_sec,infer_step_time,infer_batch_size,param_count,infer_gmacs,infer_macts\ntest_vit"
  },
  {
    "path": "results/benchmark-train-amp-nchw-pt112-cu113-rtx3090.csv",
    "chars": 38370,
    "preview": "model,train_samples_per_sec,train_step_time,train_batch_size,train_img_size,param_count\ntinynet_e,10001.12,50.423,512,10"
  },
  {
    "path": "results/benchmark-train-amp-nhwc-pt112-cu113-rtx3090.csv",
    "chars": 38278,
    "preview": "model,train_samples_per_sec,train_step_time,train_batch_size,train_img_size,param_count\ntinynet_e,11915.85,41.681,512,10"
  },
  {
    "path": "results/generate_csv_results.py",
    "chars": 2769,
    "preview": "import numpy as np\nimport pandas as pd\n\n\nresults = {\n    'results-imagenet.csv': [\n        'results-imagenet-real.csv',\n"
  },
  {
    "path": "results/model_metadata-in1k.csv",
    "chars": 12072,
    "preview": "model,pretrain\nadv_inception_v3,in1k-adv\nbat_resnext26ts,in1k\nbeit_base_patch16_224,in21k-selfsl\nbeit_base_patch16_384,i"
  },
  {
    "path": "results/results-imagenet-a-clean.csv",
    "chars": 121595,
    "preview": "model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation\neva02_large_patch14_448.mim_in22k_ft_in22k"
  },
  {
    "path": "results/results-imagenet-a.csv",
    "chars": 155354,
    "preview": "model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation,top1_diff,top5_diff,rank_diff\neva02_large_"
  },
  {
    "path": "results/results-imagenet-r-clean.csv",
    "chars": 121582,
    "preview": "model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation\neva02_large_patch14_448.mim_in22k_ft_in22k"
  },
  {
    "path": "results/results-imagenet-r.csv",
    "chars": 156142,
    "preview": "model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation,top1_diff,top5_diff,rank_diff\nvit_so400m_p"
  },
  {
    "path": "results/results-imagenet-real.csv",
    "chars": 150724,
    "preview": "model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation,top1_diff,top5_diff,rank_diff\neva02_large_"
  },
  {
    "path": "results/results-imagenet.csv",
    "chars": 123104,
    "preview": "model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation\neva02_large_patch14_448.mim_m38m_ft_in22k_"
  },
  {
    "path": "results/results-imagenetv2-matched-frequency.csv",
    "chars": 152771,
    "preview": "model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation,top1_diff,top5_diff,rank_diff\neva02_large_"
  },
  {
    "path": "results/results-sketch.csv",
    "chars": 156199,
    "preview": "model,img_size,top1,top1_err,top5,top5_err,param_count,crop_pct,interpolation,top1_diff,top5_diff,rank_diff\nvit_so400m_p"
  },
  {
    "path": "setup.cfg",
    "chars": 160,
    "preview": "[dist_conda]\n\nconda_name_differences = 'torch:pytorch'\nchannels = pytorch\nnoarch = True\n\n[metadata]\n\nurl = \"https://gith"
  },
  {
    "path": "tests/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "tests/test_checkpoint_loading.py",
    "chars": 3939,
    "preview": "import argparse\nimport inspect\n\nimport pytest\nimport torch\n\nfrom timm.models._helpers import load_state_dict, resume_che"
  },
  {
    "path": "tests/test_layers.py",
    "chars": 4175,
    "preview": "import pytest\nimport torch\nimport torch.nn as nn\n\nfrom timm.layers import create_act_layer, set_layer_config, get_act_la"
  },
  {
    "path": "tests/test_layers_drop.py",
    "chars": 10708,
    "preview": "\"\"\"Tests for timm.layers.drop module (DropBlock, DropPath).\"\"\"\nimport torch\nimport pytest\n\nfrom timm.layers.drop import "
  },
  {
    "path": "tests/test_layers_pool.py",
    "chars": 21153,
    "preview": "\"\"\"Tests for timm pooling layers.\"\"\"\nimport pytest\nimport torch\nimport torch.nn as nn\n\nimport importlib\nimport os\n\ntorch"
  },
  {
    "path": "tests/test_models.py",
    "chars": 33781,
    "preview": "\"\"\"Run tests for all models\n\nTests that run on CI should have a specific marker, e.g. @pytest.mark.base. This\nmarker is "
  },
  {
    "path": "tests/test_optim.py",
    "chars": 22029,
    "preview": "\"\"\" Optimzier Tests\n\nThese tests were adapted from PyTorch' optimizer tests.\n\n\"\"\"\nimport functools\nimport importlib\nimpo"
  },
  {
    "path": "tests/test_scheduler.py",
    "chars": 18291,
    "preview": "\"\"\" Scheduler Tests\n\nTests for learning rate schedulers in timm.scheduler.\n\"\"\"\nimport math\nimport pytest\nimport torch\nfr"
  },
  {
    "path": "tests/test_utils.py",
    "chars": 5944,
    "preview": "from torch.nn.modules.batchnorm import BatchNorm2d\nfrom torchvision.ops.misc import FrozenBatchNorm2d\n\nimport timm\nimpor"
  },
  {
    "path": "timm/__init__.py",
    "chars": 604,
    "preview": "from .version import __version__ as __version__\nfrom .layers import (\n    is_scriptable as is_scriptable,\n    is_exporta"
  },
  {
    "path": "timm/data/__init__.py",
    "chars": 1241,
    "preview": "from .auto_augment import RandAugment, AutoAugment, rand_augment_ops, auto_augment_policy,\\\n    rand_augment_transform, "
  },
  {
    "path": "timm/data/_info/imagenet12k_synsets.txt",
    "chars": 118210,
    "preview": "n00005787\nn00006484\nn00007846\nn00015388\nn00017222\nn00021265\nn00021939\nn00120010\nn00141669\nn00288000\nn00288384\nn00324978\n"
  },
  {
    "path": "timm/data/_info/imagenet21k_goog_synsets.txt",
    "chars": 218430,
    "preview": "n00004475\nn00005787\nn00006024\nn00006484\nn00007846\nn00015388\nn00017222\nn00021265\nn00021939\nn00120010\nn00141669\nn00288000\n"
  },
  {
    "path": "timm/data/_info/imagenet21k_goog_to_12k_indices.txt",
    "chars": 64070,
    "preview": "1\n3\n4\n5\n6\n7\n8\n9\n10\n11\n13\n14\n15\n16\n17\n18\n19\n20\n21\n23\n24\n26\n27\n28\n29\n30\n31\n32\n33\n34\n37\n38\n41\n43\n44\n45\n46\n47\n48\n49\n50\n51\n53"
  },
  {
    "path": "timm/data/_info/imagenet21k_goog_to_22k_indices.txt",
    "chars": 119937,
    "preview": "0\n1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15\n16\n17\n18\n19\n20\n21\n22\n23\n24\n25\n26\n27\n28\n29\n30\n31\n32\n33\n34\n35\n36\n37\n38\n39\n40\n41\n42\n4"
  },
  {
    "path": "timm/data/_info/imagenet21k_miil_synsets.txt",
    "chars": 112210,
    "preview": "n00005787\nn00006484\nn00007846\nn00015388\nn00017222\nn00021265\nn00021939\nn00120010\nn00141669\nn00288000\nn00288384\nn00324978\n"
  },
  {
    "path": "timm/data/_info/imagenet21k_miil_w21_synsets.txt",
    "chars": 104500,
    "preview": "n00005787\nn00006484\nn00007846\nn00015388\nn00017222\nn00021265\nn00021939\nn00120010\nn00141669\nn00288000\nn00288384\nn00324978\n"
  },
  {
    "path": "timm/data/_info/imagenet22k_ms_synsets.txt",
    "chars": 218420,
    "preview": "n01440764\nn01443537\nn01484850\nn01491361\nn01494475\nn01496331\nn01498041\nn01514668\nn01514859\nn01518878\nn01530575\nn01531178\n"
  },
  {
    "path": "timm/data/_info/imagenet22k_ms_to_12k_indices.txt",
    "chars": 63625,
    "preview": "1001\n1003\n1004\n1005\n1006\n1007\n1008\n1009\n1010\n1011\n1013\n1014\n1015\n1016\n1017\n1018\n1019\n1020\n1021\n1023\n1024\n1026\n1027\n1028\n"
  },
  {
    "path": "timm/data/_info/imagenet22k_ms_to_22k_indices.txt",
    "chars": 119938,
    "preview": "1000\n1001\n1002\n1003\n1004\n1005\n1006\n1007\n1008\n1009\n1010\n1011\n1012\n1013\n1014\n1015\n1016\n1017\n1018\n1019\n1020\n1021\n1022\n1023\n"
  },
  {
    "path": "timm/data/_info/imagenet22k_synsets.txt",
    "chars": 218410,
    "preview": "n00004475\nn00005787\nn00006024\nn00006484\nn00007846\nn00015388\nn00017222\nn00021265\nn00021939\nn00120010\nn00141669\nn00288000\n"
  },
  {
    "path": "timm/data/_info/imagenet22k_to_12k_indices.txt",
    "chars": 64070,
    "preview": "1\n3\n4\n5\n6\n7\n8\n9\n10\n11\n13\n14\n15\n16\n17\n18\n19\n20\n21\n23\n24\n26\n27\n28\n29\n30\n31\n32\n33\n34\n37\n38\n41\n43\n44\n45\n46\n47\n48\n49\n50\n51\n53"
  },
  {
    "path": "timm/data/_info/imagenet_a_indices.txt",
    "chars": 774,
    "preview": "6\n11\n13\n15\n17\n22\n23\n27\n30\n37\n39\n42\n47\n50\n57\n70\n71\n76\n79\n89\n90\n94\n96\n97\n99\n105\n107\n108\n110\n113\n124\n125\n130\n132\n143\n144\n15"
  },
  {
    "path": "timm/data/_info/imagenet_a_synsets.txt",
    "chars": 2000,
    "preview": "n01498041\nn01531178\nn01534433\nn01558993\nn01580077\nn01614925\nn01616318\nn01631663\nn01641577\nn01669191\nn01677366\nn01687978\n"
  },
  {
    "path": "timm/data/_info/imagenet_r_indices.txt",
    "chars": 769,
    "preview": "1\n2\n4\n6\n8\n9\n11\n13\n22\n23\n26\n29\n31\n39\n47\n63\n71\n76\n79\n84\n90\n94\n96\n97\n99\n100\n105\n107\n113\n122\n125\n130\n132\n144\n145\n147\n148\n150"
  },
  {
    "path": "timm/data/_info/imagenet_r_synsets.txt",
    "chars": 2000,
    "preview": "n01443537\nn01484850\nn01494475\nn01498041\nn01514859\nn01518878\nn01531178\nn01534433\nn01614925\nn01616318\nn01630670\nn01632777\n"
  },
  {
    "path": "timm/data/_info/imagenet_real_labels.json",
    "chars": 388478,
    "preview": "[[], [970, 795], [230, 231], [809], [516, 850], [57], [334], [700], [674], [332], [109], [286], [370], [757], [595], [14"
  },
  {
    "path": "timm/data/_info/imagenet_synset_to_definition.txt",
    "chars": 1748917,
    "preview": "n00004475\ta living thing that has (or can develop) the ability to act or function independently\nn00005787\torganisms (pla"
  },
  {
    "path": "timm/data/_info/imagenet_synset_to_lemma.txt",
    "chars": 741457,
    "preview": "n00004475\torganism, being\nn00005787\tbenthos\nn00006024\theterotroph\nn00006484\tcell\nn00007846\tperson, individual, someone, "
  },
  {
    "path": "timm/data/_info/imagenet_synsets.txt",
    "chars": 10000,
    "preview": "n01440764\nn01443537\nn01484850\nn01491361\nn01494475\nn01496331\nn01498041\nn01514668\nn01514859\nn01518878\nn01530575\nn01531178\n"
  },
  {
    "path": "timm/data/_info/mini_imagenet_indices.txt",
    "chars": 393,
    "preview": "12\n15\n51\n64\n70\n96\n99\n107\n111\n121\n149\n166\n173\n176\n207\n214\n228\n242\n244\n245\n249\n251\n256\n266\n270\n275\n279\n291\n299\n301\n306\n310"
  },
  {
    "path": "timm/data/_info/mini_imagenet_synsets.txt",
    "chars": 1000,
    "preview": "n01532829\nn01558993\nn01704323\nn01749939\nn01770081\nn01843383\nn01855672\nn01910747\nn01930112\nn01981276\nn02074367\nn02089867\n"
  },
  {
    "path": "timm/data/auto_augment.py",
    "chars": 35599,
    "preview": "\"\"\" AutoAugment, RandAugment, AugMix, and 3-Augment for PyTorch\n\nThis code implements the searched ImageNet policies wit"
  },
  {
    "path": "timm/data/config.py",
    "chars": 4616,
    "preview": "import logging\nfrom .constants import *\n\n\n_logger = logging.getLogger(__name__)\n\n\ndef resolve_data_config(\n        args="
  },
  {
    "path": "timm/data/constants.py",
    "chars": 442,
    "preview": "DEFAULT_CROP_PCT = 0.875\nDEFAULT_CROP_MODE = 'center'\nIMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)\nIMAGENET_DEFAULT_STD"
  },
  {
    "path": "timm/data/dataset.py",
    "chars": 6527,
    "preview": "\"\"\" Quick n Simple Image Folder, Tarfile based DataSet\n\nHacked together by / Copyright 2019, Ross Wightman\n\"\"\"\nimport io"
  },
  {
    "path": "timm/data/dataset_factory.py",
    "chars": 8627,
    "preview": "\"\"\" Dataset Factory\n\nHacked together by / Copyright 2021, Ross Wightman\n\"\"\"\nimport os\nfrom typing import Optional\n\nfrom "
  },
  {
    "path": "timm/data/dataset_info.py",
    "chars": 2391,
    "preview": "from abc import ABC, abstractmethod\nfrom typing import Dict, List, Optional, Union\n\n\nclass DatasetInfo(ABC):\n\n    def __"
  },
  {
    "path": "timm/data/distributed_sampler.py",
    "chars": 5540,
    "preview": "import math\nimport torch\nfrom torch.utils.data import Sampler\nimport torch.distributed as dist\n\n\nclass OrderedDistribute"
  },
  {
    "path": "timm/data/imagenet_info.py",
    "chars": 4167,
    "preview": "import csv\nimport os\nimport pkgutil\nimport re\nfrom typing import Dict, List, Optional, Union\n\nfrom .dataset_info import "
  },
  {
    "path": "timm/data/loader.py",
    "chars": 16268,
    "preview": "\"\"\" Loader Factory, Fast Collate, CUDA Prefetcher\n\nPrefetcher and Fast Collate inspired by NVIDIA APEX example at\nhttps:"
  },
  {
    "path": "timm/data/mixup.py",
    "chars": 16078,
    "preview": "\"\"\" Mixup and Cutmix\n\nPapers:\nmixup: Beyond Empirical Risk Minimization (https://arxiv.org/abs/1710.09412)\n\nCutMix: Regu"
  },
  {
    "path": "timm/data/naflex_dataset.py",
    "chars": 24473,
    "preview": "\"\"\" Dynamic Sequence Length Datasets for Variable Resolution Image Processing\n\nImplements two dataset wrappers:\n1. NaFle"
  },
  {
    "path": "timm/data/naflex_loader.py",
    "chars": 15703,
    "preview": "\"\"\"NaFlex data loader for dynamic sequence length training.\n\nThis module provides a specialized data loader for Vision T"
  },
  {
    "path": "timm/data/naflex_mixup.py",
    "chars": 8968,
    "preview": "\"\"\"Variable‑size Mixup / CutMix utilities for NaFlex data loaders.\n\nThis module provides:\n\n* `mix_batch_variable_size` –"
  },
  {
    "path": "timm/data/naflex_random_erasing.py",
    "chars": 14281,
    "preview": "\"\"\"Patch-level random erasing augmentation for NaFlex Vision Transformers.\n\nThis module implements random erasing specif"
  },
  {
    "path": "timm/data/naflex_transforms.py",
    "chars": 32533,
    "preview": "\"\"\" NaFlex (NaViT + FlexiViT) Transforms and Collation\n\nImplements PyTorch versions of the transforms described in the N"
  },
  {
    "path": "timm/data/random_erasing.py",
    "chars": 4964,
    "preview": "\"\"\" Random Erasing (Cutout)\n\nOriginally inspired by impl at https://github.com/zhunzhong07/Random-Erasing, Apache 2.0\nCo"
  },
  {
    "path": "timm/data/readers/__init__.py",
    "chars": 72,
    "preview": "from .reader_factory import create_reader\nfrom .img_extensions import *\n"
  },
  {
    "path": "timm/data/readers/class_map.py",
    "chars": 895,
    "preview": "import os\nimport pickle\n\n\ndef load_class_map(map_or_filename, root=''):\n    if isinstance(map_or_filename, dict):\n      "
  },
  {
    "path": "timm/data/readers/img_extensions.py",
    "chars": 1482,
    "preview": "from copy import deepcopy\n\n__all__ = ['get_img_extensions', 'is_img_extension', 'set_img_extensions', 'add_img_extension"
  },
  {
    "path": "timm/data/readers/reader.py",
    "chars": 487,
    "preview": "from abc import abstractmethod\n\n\nclass Reader:\n    def __init__(self):\n        pass\n\n    @abstractmethod\n    def _filena"
  },
  {
    "path": "timm/data/readers/reader_factory.py",
    "chars": 1918,
    "preview": "import os\nfrom typing import Optional\n\nfrom .reader_image_folder import ReaderImageFolder\nfrom .reader_image_in_tar impo"
  },
  {
    "path": "timm/data/readers/reader_hfds.py",
    "chars": 3262,
    "preview": "\"\"\" Dataset reader that wraps Hugging Face datasets\n\nHacked together by / Copyright 2022 Ross Wightman\n\"\"\"\nimport io\nimp"
  }
]

// ... and 263 more files (download for full content)

About this extraction

This page contains the full source code of the huggingface/pytorch-image-models GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 463 files (12.3 MB), approximately 3.3M tokens, and a symbol index with 6238 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!