Repository: frgfm/torch-scan Branch: main Commit: 56246d9de511 Files: 57 Total size: 169.0 KB Directory structure: gitextract_xv6br66t/ ├── .conda/ │ └── meta.yaml ├── .github/ │ ├── FUNDING.yml │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.yml │ │ ├── config.yml │ │ └── feature_request.yml │ ├── PULL_REQUEST_TEMPLATE.md │ ├── collect_env.py │ ├── dependabot.yml │ ├── labeler.yml │ ├── release.yml │ ├── verify_labels.py │ └── workflows/ │ ├── builds.yml │ ├── doc-status.yml │ ├── docs.yml │ ├── pr-labels.yml │ ├── publish.yml │ ├── pull_requests.yml │ ├── style.yml │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── README.md ├── docs/ │ ├── Makefile │ ├── README.md │ ├── build.sh │ ├── make.bat │ └── source/ │ ├── _static/ │ │ ├── css/ │ │ │ └── custom.css │ │ └── js/ │ │ └── custom.js │ ├── changelog.rst │ ├── conf.py │ ├── index.rst │ ├── installing.rst │ ├── modules.rst │ ├── process.rst │ ├── torchscan.rst │ └── utils.rst ├── pyproject.toml ├── scripts/ │ └── benchmark.py ├── setup.py ├── tests/ │ ├── test_crawler.py │ ├── test_modules.py │ ├── test_process.py │ └── test_utils.py └── torchscan/ ├── __init__.py ├── crawler.py ├── modules/ │ ├── __init__.py │ ├── flops.py │ ├── macs.py │ ├── memory.py │ └── receptive.py ├── process/ │ ├── __init__.py │ └── memory.py └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .conda/meta.yaml ================================================ {% set pyproject = load_file_data('../pyproject.toml', from_recipe_dir=True) %} {% set project = pyproject.get('project') %} {% set urls = pyproject.get('project', {}).get('urls') %} {% set version = environ.get('BUILD_VERSION', '0.2.0.dev0') %} package: name: {{ project.get('name') }} version: {{ version }} source: fn: {{ project.get('name') }}-{{ version }}}.tar.gz url: ../dist/{{ project.get('name') }}-{{ version }}.tar.gz build: noarch: python script: python setup.py install --single-version-externally-managed --record=record.txt requirements: host: - python>=3.8, <4.0 - setuptools run: - pytorch >=2.0.0, <3.0.0 test: # Python imports imports: - torchscan - torchscan.modules - torchscan.process - torchscan.utils requires: - python about: home: {{ urls.get('repository') }} license: Apache 2.0 license_file: {{ project.get('license', {}).get('file') }} summary: {{ project.get('description') }} # description: | # {{ data['long_description'] | replace("\n", "\n ") | replace("#", '\#')}} doc_url: {{ urls.get('documentation') }} dev_url: {{ urls.get('repository') }} ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms github: frgfm patreon: # Replace with a single Patreon username open_collective: # Replace with an OpenCollective account ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username otechie: # Replace with a single Otechie username custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: 🐛 Bug report description: Create a report to help us improve the library labels: 'type: bug' assignees: frgfm body: - type: markdown attributes: value: > #### Before reporting a bug, please check that the issue hasn't already been addressed in [the existing and past issues](https://github.com/frgfm/torch-cam/issues?q=is%3Aissue). - type: textarea attributes: label: Bug description description: | A clear and concise description of what the bug is. Please explain the result you observed and the behavior you were expecting. placeholder: | A clear and concise description of what the bug is. validations: required: true - type: textarea attributes: label: Code snippet to reproduce the bug description: | Sample code to reproduce the problem. Please wrap your code snippet with ```` ```triple quotes blocks``` ```` for readability. placeholder: | ```python Sample code to reproduce the problem ``` validations: required: true - type: textarea attributes: label: Error traceback description: | The error message you received running the code snippet, with the full traceback. Please wrap your error message with ```` ```triple quotes blocks``` ```` for readability. placeholder: | ``` The error message you got, with the full traceback. ``` validations: required: true - type: textarea attributes: label: Environment description: | Please run the following command and paste the output below. ```sh wget https://raw.githubusercontent.com/frgfm/torch-scan/main/.github/collect_env.py # For security purposes, please check the contents of collect_env.py before running it. python collect_env.py ``` validations: required: true - type: markdown attributes: value: > Thanks for helping us improve the library! ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: true contact_links: - name: Usage questions url: https://github.com/frgfm/torch-scan/discussions about: Ask questions and discuss with other TorchCAM community members ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.yml ================================================ name: 🚀 Feature request description: Submit a proposal/request for a new feature labels: 'type: enhancement' assignees: frgfm body: - type: textarea attributes: label: 🚀 Feature description: > A clear and concise description of the feature proposal validations: required: true - type: textarea attributes: label: Motivation & pitch description: > Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., *"I'm working on X and would like Y to be possible"*. If this is related to another GitHub issue, please link here too. validations: required: true - type: textarea attributes: label: Alternatives description: > A description of any alternative solutions or features you've considered, if any. - type: textarea attributes: label: Additional context description: > Add any other context or screenshots about the feature request. - type: markdown attributes: value: > Thanks for contributing 🎉 ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ # What does this PR do? Closes # (issue) ## Before submitting - [ ] Was this discussed/approved in a Github [issue](https://github.com/frgfm/torch-scan/issues?q=is%3Aissue) or a [discussion](https://github.com/frgfm/torch-scan/discussions)? Please add a link to it if that's the case. - [ ] You have read the [contribution guidelines](https://github.com/frgfm/torch-scan/blob/main/CONTRIBUTING.md#submitting-a-pull-request) and followed them in this PR. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/frgm/torch-scan/tree/main/docs). - [ ] Did you write any new necessary tests? ================================================ FILE: .github/collect_env.py ================================================ # Copyright (C) 2020-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. """ Based on https://github.com/pytorch/pytorch/blob/master/torch/utils/collect_env.py This script outputs relevant system environment info Run it with `python collect_env.py`. """ from __future__ import absolute_import, division, print_function, unicode_literals import locale import os import re import subprocess # noqa S404 import sys from pathlib import Path from typing import NamedTuple try: import torchscan TORCHSCAN_AVAILABLE = True except (ImportError, NameError, AttributeError, OSError): TORCHSCAN_AVAILABLE = False try: import torch TORCH_AVAILABLE = True except (ImportError, NameError, AttributeError, OSError): TORCH_AVAILABLE = False PY3 = sys.version_info >= (3, 0) # System Environment Information class SystemEnv(NamedTuple): torchscan_version: str torch_version: str os: str python_version: str is_cuda_available: bool cuda_runtime_version: str nvidia_driver_version: str nvidia_gpu_models: str cudnn_version: str def run(command): """Returns (return-code, stdout, stderr)""" p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) output, err = p.communicate() rc = p.returncode if PY3: enc = locale.getpreferredencoding() output = output.decode(enc) err = err.decode(enc) return rc, output.strip(), err.strip() def run_and_read_all(run_lambda, command): """Runs command using run_lambda; reads and returns entire output if rc is 0""" rc, out, _ = run_lambda(command) if rc != 0: return None return out def run_and_parse_first_match(run_lambda, command, regex): """Runs command using run_lambda, returns the first regex match if it exists""" rc, out, _ = run_lambda(command) if rc != 0: return None match = re.search(regex, out) if match is None: return None return match.group(1) def get_nvidia_driver_version(run_lambda): if get_platform() == "darwin": cmd = "kextstat | grep -i cuda" return run_and_parse_first_match(run_lambda, cmd, r"com[.]nvidia[.]CUDA [(](.*?)[)]") smi = get_nvidia_smi() return run_and_parse_first_match(run_lambda, smi, r"Driver Version: (.*?) ") def get_gpu_info(run_lambda): if get_platform() == "darwin": if TORCH_AVAILABLE and torch.cuda.is_available(): return torch.cuda.get_device_name(None) return None smi = get_nvidia_smi() uuid_regex = re.compile(r" \(UUID: .+?\)") rc, out, _ = run_lambda(smi + " -L") if rc != 0: return None # Anonymize GPUs by removing their UUID return re.sub(uuid_regex, "", out) def get_running_cuda_version(run_lambda): return run_and_parse_first_match(run_lambda, "nvcc --version", r"release .+ V(.*)") def get_cudnn_version(run_lambda): """This will return a list of libcudnn.so; it's hard to tell which one is being used""" if get_platform() == "win32": cudnn_cmd = 'where /R "%CUDA_PATH%\\bin" cudnn*.dll' elif get_platform() == "darwin": # CUDA libraries and drivers can be found in /usr/local/cuda/. See # https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html#install # https://docs.nvidia.com/deeplearning/sdk/cudnn-install/index.html#installmac # Use CUDNN_LIBRARY when cudnn library is installed elsewhere. cudnn_cmd = "ls /usr/local/cuda/lib/libcudnn*" else: cudnn_cmd = 'ldconfig -p | grep libcudnn | rev | cut -d" " -f1 | rev' rc, out, _ = run_lambda(cudnn_cmd) # find will return 1 if there are permission errors or if not found if len(out) == 0 or rc not in (1, 0): lib = os.environ.get("CUDNN_LIBRARY") if lib is not None and Path(lib).is_file(): return os.path.realpath(lib) return None files = set() for fn in out.split("\n"): fn = os.path.realpath(fn) # eliminate symbolic links if Path(fn).is_file(): files.add(fn) if not files: return None # Alphabetize the result because the order is non-deterministic otherwise files = sorted(files) if len(files) == 1: return files[0] result = "\n".join(files) return "Probably one of the following:\n{}".format(result) def get_nvidia_smi(): # Note: nvidia-smi is currently available only on Windows and Linux smi = "nvidia-smi" if get_platform() == "win32": system_root = os.environ.get("SYSTEMROOT", "C:\\Windows") program_files_root = os.environ.get("PROGRAMFILES", "C:\\Program Files") legacy_path = Path(program_files_root) / "NVIDIA Corporation" / "NVSMI" / smi new_path = Path(system_root) / "System32" / smi smis = [new_path, legacy_path] for candidate_smi in smis: if Path(candidate_smi).exists(): smi = '"{}"'.format(candidate_smi) break return smi def get_platform(): if sys.platform.startswith("linux"): return "linux" if sys.platform.startswith("win32"): return "win32" if sys.platform.startswith("cygwin"): return "cygwin" if sys.platform.startswith("darwin"): return "darwin" return sys.platform def get_mac_version(run_lambda): return run_and_parse_first_match(run_lambda, "sw_vers -productVersion", r"(.*)") def get_windows_version(run_lambda): return run_and_read_all(run_lambda, "wmic os get Caption | findstr /v Caption") def get_lsb_version(run_lambda): return run_and_parse_first_match(run_lambda, "lsb_release -a", r"Description:\t(.*)") def check_release_file(run_lambda): return run_and_parse_first_match(run_lambda, "cat /etc/*-release", r'PRETTY_NAME="(.*)"') def get_os(run_lambda): platform = get_platform() if platform in ("win32", "cygwin"): return get_windows_version(run_lambda) if platform == "darwin": version = get_mac_version(run_lambda) if version is None: return None return "Mac OSX {}".format(version) if platform == "linux": # Ubuntu/Debian based desc = get_lsb_version(run_lambda) if desc is not None: return desc # Try reading /etc/*-release desc = check_release_file(run_lambda) if desc is not None: return desc return platform # Unknown platform return platform def get_env_info(): run_lambda = run torchscan_str = torchscan.__version__ if TORCHSCAN_AVAILABLE else "N/A" if TORCH_AVAILABLE: torch_str = torch.__version__ cuda_available_str = torch.cuda.is_available() else: torch_str = cuda_available_str = "N/A" return SystemEnv( torchscan_version=torchscan_str, torch_version=torch_str, python_version=".".join(map(str, sys.version_info[:3])), is_cuda_available=cuda_available_str, cuda_runtime_version=get_running_cuda_version(run_lambda), nvidia_gpu_models=get_gpu_info(run_lambda), nvidia_driver_version=get_nvidia_driver_version(run_lambda), cudnn_version=get_cudnn_version(run_lambda), os=get_os(run_lambda), ) env_info_fmt = """ TorchScan version: {torchscan_version} PyTorch version: {torch_version} OS: {os} Python version: {python_version} Is CUDA available: {is_cuda_available} CUDA runtime version: {cuda_runtime_version} GPU models and configuration: {nvidia_gpu_models} Nvidia driver version: {nvidia_driver_version} cuDNN version: {cudnn_version} """.strip() def pretty_str(envinfo): def replace_nones(dct, replacement="Could not collect"): for key in dct: if dct[key] is not None: continue dct[key] = replacement return dct def replace_bools(dct, true="Yes", false="No"): for key in dct: if dct[key] is True: dct[key] = true elif dct[key] is False: dct[key] = false return dct def maybe_start_on_next_line(string): # If `string` is multiline, prepend a \n to it. if string is not None and len(string.split("\n")) > 1: return "\n{}\n".format(string) return string mutable_dict = envinfo._asdict() # If nvidia_gpu_models is multiline, start on the next line mutable_dict["nvidia_gpu_models"] = maybe_start_on_next_line(envinfo.nvidia_gpu_models) # If the machine doesn't have CUDA, report some fields as 'No CUDA' dynamic_cuda_fields = [ "cuda_runtime_version", "nvidia_gpu_models", "nvidia_driver_version", ] all_cuda_fields = [*dynamic_cuda_fields, "cudnn_version"] all_dynamic_cuda_fields_missing = all(mutable_dict[field] is None for field in dynamic_cuda_fields) if TORCH_AVAILABLE and not torch.cuda.is_available() and all_dynamic_cuda_fields_missing: for field in all_cuda_fields: mutable_dict[field] = "No CUDA" # Replace True with Yes, False with No mutable_dict = replace_bools(mutable_dict) # Replace all None objects with 'Could not collect' mutable_dict = replace_nones(mutable_dict) return env_info_fmt.format(**mutable_dict) def get_pretty_env_info(): """Collects environment information for debugging purposes Returns: str: environment information """ return pretty_str(get_env_info()) def main(): print("Collecting environment information...") output = get_pretty_env_info() print(output) if __name__ == "__main__": main() ================================================ FILE: .github/dependabot.yml ================================================ # To get started with Dependabot version updates, you'll need to specify which # package ecosystems to update and where the package manifests are located. # Please see the documentation for all configuration options: # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "monthly" time: "06:00" timezone: "Europe/Paris" groups: gh-actions: patterns: - "*" reviewers: - "frgfm" assignees: - "frgfm" - package-ecosystem: "pip" directory: "/" schedule: interval: "daily" time: "06:00" timezone: "Europe/Paris" reviewers: - "frgfm" assignees: - "frgfm" allow: - dependency-name: "ruff" - dependency-name: "mypy" - dependency-name: "pre-commit" ================================================ FILE: .github/labeler.yml ================================================ 'module: crawler': - changed-files: - any-glob-to-any-file: torchscan/crawler.py 'module: modules': - changed-files: - any-glob-to-any-file: torchscan/modules/* 'module: process': - changed-files: - any-glob-to-any-file: torchscan/process/* 'module: utils': - changed-files: - any-glob-to-any-file: torchscan/utils.py 'ext: docs': - changed-files: - any-glob-to-any-file: docs/* 'ext: scripts': - changed-files: - any-glob-to-any-file: scripts/* 'ext: tests': - changed-files: - any-glob-to-any-file: tests/* 'topic: ci': - changed-files: - any-glob-to-any-file: .github/* 'topic: docs': - changed-files: - any-glob-to-any-file: - README.md - CONTRIBUTING.md - CODFE_OF_CONDUCT.md - CITATION.cff - LICENSE 'topic: build': - changed-files: - any-glob-to-any-file: - setup.py - pyproject.toml 'topic: style': - changed-files: - any-glob-to-any-file: .pre-commit-config.yaml ================================================ FILE: .github/release.yml ================================================ changelog: exclude: labels: - ignore-for-release categories: - title: Breaking Changes 🛠 labels: - "type: breaking change" # NEW FEATURES - title: New Features 🚀 labels: - "type: feat" # BUG FIXES - title: Bug Fixes 🐛 labels: - "type: fix" # IMPROVEMENTS - title: Improvements labels: - "type: improvement" # MISC - title: Miscellaneous labels: - "type: misc" ================================================ FILE: .github/verify_labels.py ================================================ # Copyright (C) 2022-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. """ Borrowed & adapted from https://github.com/pytorch/vision/blob/main/.github/process_commit.py This script finds the merger responsible for labeling a PR by a commit SHA. It is used by the workflow in '.github/workflows/pr-labels.yml'. If there exists no PR associated with the commit or the PR is properly labeled, this script is a no-op. Note: we ping the merger only, not the reviewers, as the reviewers can sometimes be external to torchvision with no labeling responsibility, so we don't want to bother them. """ from typing import Any, Set, Tuple import requests # For a PR to be properly labeled it should have one primary label and one secondary label # Should specify the type of change PRIMARY_LABELS = { "type: new feature", "type: bug", "type: enhancement", "type: misc", } # Should specify what has been modified SECONDARY_LABELS = { "topic: documentation", "module: modules", "module: process", "module: crawler", "module: utils", "ext: docs", "ext: scripts", "ext: tests", "topic: build", "topic: ci", } GH_ORG = "frgfm" GH_REPO = "torch-scan" def query_repo(cmd: str, *, accept) -> Any: response = requests.get( f"https://api.github.com/repos/{GH_ORG}/{GH_REPO}/{cmd}", headers={"Accept": accept}, timeout=5 ) return response.json() def get_pr_merger_and_labels(pr_number: int) -> Tuple[str, Set[str]]: # See https://docs.github.com/en/rest/reference/pulls#get-a-pull-request data = query_repo(f"pulls/{pr_number}", accept="application/vnd.github.v3+json") merger = data.get("merged_by", {}).get("login") labels = {label["name"] for label in data["labels"]} return merger, labels def main(args): merger, labels = get_pr_merger_and_labels(args.pr) is_properly_labeled = bool(PRIMARY_LABELS.intersection(labels) and SECONDARY_LABELS.intersection(labels)) if isinstance(merger, str) and not is_properly_labeled: print(f"@{merger}") def parse_args(): import argparse parser = argparse.ArgumentParser( description="PR label checker", formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument("pr", type=int, help="PR number") return parser.parse_args() if __name__ == "__main__": args = parse_args() main(args) ================================================ FILE: .github/workflows/builds.yml ================================================ name: builds on: push: branches: main pull_request: branches: main jobs: build: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] python: [3.8, 3.9, '3.10', 3.11, 3.12] exclude: - os: macos-latest python: 3.8 - os: macos-latest python: 3.9 - os: macos-latest python: '3.10' steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} architecture: x64 - name: Install package run: | python -m pip install --upgrade uv uv pip install --system -e . - name: Import package run: python -c "import torchscan; print(torchscan.__version__)" pypi: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.11 architecture: x64 - name: Install dependencies run: | python -m pip install --upgrade uv uv pip install --system setuptools wheel twine --upgrade - run: | python setup.py sdist bdist_wheel twine check dist/* conda: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: "3.11" - name: Install dependencies shell: bash -el {0} run: conda install -y conda-build conda-verify - name: Build conda shell: bash -el {0} run: | python setup.py sdist mkdir conda-dist conda env list conda build .conda/ -c pytorch --output-folder conda-dist ls -l conda-dist/noarch/*tar.bz2 ================================================ FILE: .github/workflows/doc-status.yml ================================================ name: GH-Pages Status on: page_build jobs: see-page-build-payload: runs-on: ubuntu-latest steps: - uses: actions/setup-python@v5 with: python-version: 3.11 architecture: x64 - name: check status run: | import os status, errormsg = os.getenv('STATUS'), os.getenv('ERROR') if status != 'built': raise AssertionError(f"There was an error building the page on GitHub pages.\n\nStatus: {status}\n\nError messsage: {errormsg}") shell: python env: STATUS: ${{ github.event.build.status }} ERROR: ${{ github.event.build.error.message }} ================================================ FILE: .github/workflows/docs.yml ================================================ name: docs on: push: branches: main jobs: docs-deploy: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] python: [3.9] steps: - uses: actions/checkout@v4 with: persist-credentials: false - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} architecture: x64 - name: Install dependencies run: | python -m pip install --upgrade uv uv pip install --system -e ".[docs]" - name: Build documentation run: cd docs && bash build.sh - name: Documentation sanity check run: test -e docs/build/index.html || exit - name: Install SSH Client 🔑 uses: webfactory/ssh-agent@v0.9.0 with: ssh-private-key: ${{ secrets.SSH_DEPLOY_KEY }} - name: Deploy to Github Pages uses: JamesIves/github-pages-deploy-action@v4 with: BRANCH: gh-pages FOLDER: 'docs/build' COMMIT_MESSAGE: '[skip ci] Documentation updates' CLEAN: true SSH: true ================================================ FILE: .github/workflows/pr-labels.yml ================================================ name: pr-labels on: pull_request: branches: main types: closed jobs: is-properly-labeled: if: github.event.pull_request.merged == true runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - name: Install requests run: pip install requests - name: Process commit and find merger responsible for labeling id: commit run: echo "::set-output name=merger::$(python .github/verify_labels.py ${{ github.event.pull_request.number }})" - name: Comment PR uses: actions/github-script@7.0.1 if: ${{ steps.commit.outputs.merger != '' }} with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | const { issue: { number: issue_number }, repo: { owner, repo } } = context; github.issues.createComment({ issue_number, owner, repo, body: 'Hey ${{ steps.commit.outputs.merger }} 👋\nYou merged this PR, but it is not correctly labeled. The list of valid labels is available at https://github.com/frgfm/torch-cam/blob/main/.github/verify_labels.py' }); ================================================ FILE: .github/workflows/publish.yml ================================================ name: publish on: release: types: [published] jobs: pypi: if: "!github.event.release.prerelease" runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.11 architecture: x64 - name: Install dependencies run: | python -m pip install --upgrade uv uv pip install --system setuptools wheel twine --upgrade - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | echo "BUILD_VERSION=${GITHUB_REF#refs/*/}" | cut -c 2- >> $GITHUB_ENV python setup.py sdist bdist_wheel twine check dist/* twine upload dist/* pypi-check: if: "!github.event.release.prerelease" runs-on: ubuntu-latest needs: pypi steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.11 architecture: x64 - name: Install package run: | python -m pip install --upgrade uv uv pip install --system torchscan python -c "import torchscan; print(torchscan.__version__)" conda: if: "!github.event.release.prerelease" runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Miniconda setup uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: 3.11 - name: Install dependencies shell: bash -el {0} run: conda install -y conda-build conda-verify anaconda-client - name: Build and publish shell: bash -el {0} env: ANACONDA_API_TOKEN: ${{ secrets.ANACONDA_TOKEN }} run: | echo "BUILD_VERSION=${GITHUB_REF#refs/*/}" | cut -c 2- >> $GITHUB_ENV python setup.py sdist mkdir conda-dist conda build .conda/ -c pytorch --output-folder conda-dist ls -l conda-dist/noarch/*tar.bz2 anaconda upload conda-dist/noarch/*tar.bz2 conda-check: if: "!github.event.release.prerelease" runs-on: ubuntu-latest needs: conda steps: - name: Miniconda setup uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: 3.11 auto-activate-base: true - name: Install package shell: bash -el {0} run: | conda install -c frgfm torchscan python -c "import torchscan; print(torchscan.__version__)" ================================================ FILE: .github/workflows/pull_requests.yml ================================================ name: pull_requests on: pull_request: branches: main jobs: docs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.9 architecture: x64 - name: Install dependencies run: | python -m pip install --upgrade uv uv pip install --system -e ".[docs]" - name: Build documentation run: cd docs && bash build.sh - name: Documentation sanity check run: test -e docs/build/index.html || exit triage: permissions: contents: read pull-requests: write runs-on: ubuntu-latest steps: - uses: actions/labeler@v5 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" ================================================ FILE: .github/workflows/style.yml ================================================ name: style on: push: branches: main pull_request: branches: main jobs: ruff: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] python: [3.11] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} architecture: x64 - name: Run ruff run: | python -m pip install --upgrade uv uv pip install --system -e '.[quality]' ruff --version ruff check --diff . mypy: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] python: [3.11] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} architecture: x64 - name: Run mypy run: | python -m pip install --upgrade uv uv pip install --system -e '.[quality]' mypy --version mypy ruff-format: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] python: [3.11] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} architecture: x64 - name: Run ruff run: | python -m pip install --upgrade uv uv pip install --system -e '.[quality]' ruff --version ruff format --check --diff . precommit-hooks: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] python: [3.11] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} architecture: x64 - name: Run pre-commit hooks run: | python -m pip install --upgrade uv uv pip install --system -e '.[quality]' git checkout -b temp pre-commit install pre-commit --version pre-commit run --all-files ================================================ FILE: .github/workflows/tests.yml ================================================ name: tests on: push: branches: main pull_request: branches: main jobs: pytest: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] python: [3.11] steps: - uses: actions/checkout@v4 with: persist-credentials: false - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} architecture: x64 - name: Install dependencies run: | python -m pip install --upgrade uv uv pip install --system -e ".[test]" --upgrade - name: Run unittests run: pytest --cov=torchscan --cov-report xml tests/ - uses: actions/upload-artifact@v4 with: name: coverage-reports path: ./coverage.xml codecov-upload: runs-on: ubuntu-latest needs: pytest steps: - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 - name: Upload coverage to Codecov uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} flags: unittests directory: ./coverage-reports fail_ci_if_error: true headers: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest] steps: - uses: actions/checkout@v4 with: persist-credentials: false - name: Check the headers uses: frgfm/validate-python-headers@main with: license: 'Apache-2.0' owner: 'François-Guillaume Fernandez' starting-year: 2020 folders: 'torchscan,scripts,docs,.github' ignores: 'version.py,__init__.py' ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ conda-dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # Package version torchscan/version.py # Conda distribution conda-dist/ ================================================ FILE: .pre-commit-config.yaml ================================================ default_language_version: python: python3.11 repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 hooks: - id: check-added-large-files - id: check-ast - id: check-case-conflict - id: check-json - id: check-merge-conflict - id: check-symlinks - id: check-toml - id: check-xml - id: check-yaml exclude: .conda - id: debug-statements language_version: python3 - id: end-of-file-fixer - id: no-commit-to-branch args: ['--branch', 'main'] - id: requirements-txt-fixer - id: trailing-whitespace - repo: https://github.com/charliermarsh/ruff-pre-commit rev: 'v0.6.4' hooks: - id: ruff args: - --fix - id: ruff-format ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Contributor Covenant Code of Conduct ## Our Pledge We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. ## Our Standards Examples of behavior that contributes to a positive environment for our community include: * Demonstrating empathy and kindness toward other people * Being respectful of differing opinions, viewpoints, and experiences * Giving and gracefully accepting constructive feedback * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience * Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: * The use of sexualized language or imagery, and sexual attention or advances of any kind * Trolling, insulting or derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or email address, without their explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. ## Scope This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at fg-feedback@protonmail.com. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. ## Enforcement Guidelines Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: ### 1. Correction **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. ### 2. Warning **Community Impact**: A violation through a single incident or series of actions. **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. ### 3. Temporary Ban **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. ### 4. Permanent Ban **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. **Consequence**: A permanent ban from any sort of public interaction within the community. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations. ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing to torchscan Everything you need to know to contribute efficiently to the project. Whatever the way you wish to contribute to the project, please respect the [code of conduct](CODE_OF_CONDUCT.md). ## Codebase structure - [torchscan](https://github.com/frgfm/torch-scan/blob/main/torchscan) - The actual torchscan library - [tests](https://github.com/frgfm/torch-scan/blob/main/tests) - Python unit tests - [docs](https://github.com/frgfm/torch-scan/blob/main/docs) - Sphinx documentation building - [scripts](https://github.com/frgfm/torch-scan/blob/main/scripts) - Example and utilities scripts ## Continuous Integration This project uses the following integrations to ensure proper codebase maintenance: - [Github Worklow](https://help.github.com/en/actions/configuring-and-managing-workflows/configuring-a-workflow) - run jobs for package build and coverage - [Codacy](https://www.codacy.com/) - analyzes commits for code quality - [Codecov](https://codecov.io/) - reports back coverage results As a contributor, you will only have to ensure coverage of your code by adding appropriate unit testing of your code. ## Feedback ### Feature requests & bug report Whether you encountered a problem, or you have a feature suggestion, your input has value and can be used by contributors to reference it in their developments. For this purpose, we advise you to use Github [issues](https://github.com/frgfm/torch-scan/issues). First, check whether the topic wasn't already covered in an open / closed issue. If not, feel free to open a new one! When doing so, use issue templates whenever possible and provide enough information for other contributors to jump in. ### Questions If you are wondering how to do something with TorchScan, or a more general question, you should consider checking out Github [discussions](https://github.com/frgfm/torch-scan/discussions). See it as a Q&A forum, or the TorchScan-specific StackOverflow! ## Submitting a Pull Request ### Preparing your local branch 1 - Fork this [repository](https://github.com/frgfm/torch-scan) by clicking on the "Fork" button at the top right of the page. This will create a copy of the project under your GitHub account (cf. [Fork a repo](https://docs.github.com/en/get-started/quickstart/fork-a-repo)). 2 - [Clone your fork](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) to your local disk and set the upstream to this repo ```shell git clone git@github.com:/torch-scan.git cd torch-scan git remote add upstream https://github.com/frgfm/torch-scan.git ``` 3 - You should not work on the `main` branch, so let's create a new one ```shell git checkout -b a-short-description ``` 4 - You only have to set your development environment now. First uninstall any existing installation of the library with `pip uninstall torch-scan`, then: ```shell pip install -e ".[dev]" pre-commit install ``` ### Developing your feature #### Commits - **Code**: ensure to provide docstrings to your Python code. In doing so, please follow [Google-style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) so it can ease the process of documentation later. - **Commit message**: please follow [Udacity guide](http://udacity.github.io/git-styleguide/) #### Unit tests In order to run the same unit tests as the CI workflows, you can run unittests locally: ```shell make test ``` #### Code quality The CI will also run some sanity checks (header format, dependency consistency, etc.), which you can run as follows: ```shell make quality ``` This will read `pyproject.toml` and run: - lint checking, formatting ([ruff](https://docs.astral.sh/ruff/)) - type annotation checking ([mypy](https://github.com/python/mypy)) You can apply automatic fix to most of those by running: ```shell make style ``` ### Submit your modifications Push your last modifications to your remote branch ```shell git push -u origin a-short-description ``` Then [open a Pull Request](https://docs.github.com/en/github/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) from your fork's branch. Follow the instructions of the Pull Request template and then click on "Create a pull request". ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: Makefile ================================================ # this target runs checks on all files quality: ruff format --check . ruff check . mypy # this target runs checks on all files and potentially modifies some of them style: ruff format . ruff check --fix . # Run tests for the library test: pytest --cov=torchscan tests/ # Build documentation for current version single-docs: sphinx-build docs/source docs/_build -a # Check that docs can build full-docs: cd docs && bash build.sh ================================================ FILE: README.md ================================================

CI Status ruff ruff Test coverage percentage

PyPi Version Conda Version pyversions License

Documentation Status

The very useful [summary](https://www.tensorflow.org/api_docs/python/tf/keras/Model#summary) method of `tf.keras.Model` but for PyTorch, with more useful information. ## Quick Tour ### Inspecting your PyTorch architecture Similarly to the `torchsummary` implementation, `torchscan` brings useful module information into readable format. For nested complex architectures, you can use a maximum depth of display as follows: ```python from torchvision.models import densenet121 from torchscan import summary model = densenet121().eval().cuda() summary(model, (3, 224, 224), max_depth=2) ``` which would yield ```shell __________________________________________________________________________________________ Layer Type Output Shape Param # ========================================================================================== densenet DenseNet (-1, 1000) 0 ├─features Sequential (-1, 1024, 7, 7) 0 | └─conv0 Conv2d (-1, 64, 112, 112) 9,408 | └─norm0 BatchNorm2d (-1, 64, 112, 112) 257 | └─relu0 ReLU (-1, 64, 112, 112) 0 | └─pool0 MaxPool2d (-1, 64, 56, 56) 0 | └─denseblock1 _DenseBlock (-1, 256, 56, 56) 338,316 | └─transition1 _Transition (-1, 128, 28, 28) 33,793 | └─denseblock2 _DenseBlock (-1, 512, 28, 28) 930,072 | └─transition2 _Transition (-1, 256, 14, 14) 133,121 | └─denseblock3 _DenseBlock (-1, 1024, 14, 14) 2,873,904 | └─transition3 _Transition (-1, 512, 7, 7) 528,385 | └─denseblock4 _DenseBlock (-1, 1024, 7, 7) 2,186,272 | └─norm5 BatchNorm2d (-1, 1024, 7, 7) 4,097 ├─classifier Linear (-1, 1000) 1,025,000 ========================================================================================== Trainable params: 7,978,856 Non-trainable params: 0 Total params: 7,978,856 ------------------------------------------------------------------------------------------ Model size (params + buffers): 30.76 Mb Framework & CUDA overhead: 423.57 Mb Total RAM usage: 454.32 Mb ------------------------------------------------------------------------------------------ Floating Point Operations on forward: 5.74 GFLOPs Multiply-Accumulations on forward: 2.87 GMACs Direct memory accesses on forward: 2.90 GDMAs __________________________________________________________________________________________ ``` Results are aggregated to the selected depth for improved readability. For reference, here are explanations of a few acronyms: - **FLOPs**: floating-point operations (not to be confused with FLOPS which is FLOPs per second) - **MACs**: mutiply-accumulate operations (cf. [wikipedia](https://en.wikipedia.org/wiki/Multiply%E2%80%93accumulate_operation)) - **DMAs**: direct memory accesses (many argue that it is more relevant than FLOPs or MACs to compare model inference speeds cf. [wikipedia](https://en.wikipedia.org/wiki/Direct_memory_access)) Additionally, for highway nets (models without multiple branches / skip connections), `torchscan` supports receptive field estimation. ```python from torchvision.models import vgg16 from torchscan import summary model = vgg16().eval().cuda() summary(model, (3, 224, 224), receptive_field=True, max_depth=0) ``` which will add the layer's receptive field (relatively to the last convolutional layer) to the summary. ## Setup Python 3.8 (or newer) and [pip](https://pip.pypa.io/en/stable/)/[conda](https://docs.conda.io/en/latest/miniconda.html) are required to install Torchscan. ### Stable release You can install the last stable release of the package using [pypi](https://pypi.org/project/torch-scan/) as follows: ```shell pip install torchscan ``` or using [conda](https://anaconda.org/frgfm/torchscan): ```shell conda install -c frgfm torchscan ``` ### Developer installation Alternatively, if you wish to use the latest features of the project that haven't made their way to a release yet, you can install the package from source: ```shell git clone https://github.com/frgfm/torch-scan.git pip install -e torch-scan/. ``` ## Benchmark Below are the results for classification models supported by `torchvision` for a single image with 3 color channels of size `224x224` (apart from `inception_v3` which uses `299x299`). | Model | Params (M) | FLOPs (G) | MACs (G) | DMAs (G) | RF | | ------------------ | ---------- | --------- | -------- | -------- | ---- | | alexnet | 61.1 | 1.43 | 0.71 | 0.72 | 195 | | googlenet | 6.62 | 3.01 | 1.51 | 1.53 | -- | | vgg11 | 132.86 | 15.23 | 7.61 | 7.64 | 150 | | vgg11_bn | 132.87 | 15.26 | 7.63 | 7.66 | 150 | | vgg13 | 133.05 | 22.63 | 11.31 | 11.35 | 156 | | vgg13_bn | 133.05 | 22.68 | 11.33 | 11.37 | 156 | | vgg16 | 138.36 | 30.96 | 15.47 | 15.52 | 212 | | vgg16_bn | 138.37 | 31.01 | 15.5 | 15.55 | 212 | | vgg19 | 143.67 | 39.28 | 19.63 | 19.69 | 268 | | vgg19_bn | 143.68 | 39.34 | 19.66 | 19.72 | 268 | | resnet18 | 11.69 | 3.64 | 1.82 | 1.84 | -- | | resnet34 | 21.8 | 7.34 | 3.67 | 3.7 | -- | | resnet50 | 25.56 | 8.21 | 4.11 | 4.15 | -- | | resnet101 | 44.55 | 15.66 | 7.83 | 7.9 | -- | | resnet152 | 60.19 | 23.1 | 11.56 | 11.65 | -- | | inception_v3 | 27.16 | 11.45 | 5.73 | 5.76 | -- | | squeezenet1_0 | 1.25 | 1.64 | 0.82 | 0.83 | -- | | squeezenet1_1 | 1.24 | 0.7 | 0.35 | 0.36 | -- | | wide_resnet50_2 | 68.88 | 22.84 | 11.43 | 11.51 | -- | | wide_resnet101_2 | 126.89 | 45.58 | 22.8 | 22.95 | -- | | densenet121 | 7.98 | 5.74 | 2.87 | 2.9 | -- | | densenet161 | 28.68 | 15.59 | 7.79 | 7.86 | -- | | densenet169 | 14.15 | 6.81 | 3.4 | 3.44 | -- | | densenet201 | 20.01 | 8.7 | 4.34 | 4.39 | -- | | resnext50_32x4d | 25.03 | 8.51 | 4.26 | 4.3 | -- | | resnext101_32x8d | 88.79 | 32.93 | 16.48 | 16.61 | -- | | mobilenet_v2 | 3.5 | 0.63 | 0.31 | 0.32 | -- | | shufflenet_v2_x0_5 | 1.37 | 0.09 | 0.04 | 0.05 | -- | | shufflenet_v2_x1_0 | 2.28 | 0.3 | 0.15 | 0.15 | -- | | shufflenet_v2_x1_5 | 3.5 | 0.6 | 0.3 | 0.31 | -- | | shufflenet_v2_x2_0 | 7.39 | 1.18 | 0.59 | 0.6 | -- | | mnasnet0_5 | 2.22 | 0.22 | 0.11 | 0.12 | -- | | mnasnet0_75 | 3.17 | 0.45 | 0.23 | 0.24 | -- | | mnasnet1_0 | 4.38 | 0.65 | 0.33 | 0.34 | -- | | mnasnet1_3 | 6.28 | 1.08 | 0.54 | 0.56 | -- | The above results were produced using the `scripts/benchmark.py` script. *Note: receptive field computation is currently only valid for highway nets.* ## What else ### Documentation The full package documentation is available [here](https://frgfm.github.io/torch-scan/) for detailed specifications. ### Example script An example script is provided for you to benchmark torchvision models using the library: ```shell python scripts/benchmark.py ``` ## Credits This project is developed and maintained by the repo owner, but the implementation was inspired or helped by the following contributions: - [Pytorch summary](https://github.com/sksq96/pytorch-summary): existing PyTorch porting of `tf.keras.Model.summary` - [Torchstat](https://github.com/Swall0w/torchstat): another module inspection tool - [Flops counter Pytorch](https://github.com/sovrasov/flops-counter.pytorch): operation counter tool - [THOP](https://github.com/Lyken17/pytorch-OpCounter): PyTorch Op counter - Number of operations and memory estimation articles by [Matthijs Hollemans](https://machinethink.net/blog/how-fast-is-my-model/), and [Sicara](https://www.sicara.ai/blog/2019-28-10-deep-learning-memory-usage-and-pytorch-optimization-tricks) - [Pruning Convolutional Neural Networks for Resource Efficient Inference](https://arxiv.org/abs/1611.06440) ## Citation If you wish to cite this project, feel free to use this [BibTeX](http://www.bibtex.org/) reference: ```bibtex @misc{torchscan2020, title={Torchscan: meaningful module insights}, author={François-Guillaume Fernandez}, year={2020}, month={March}, publisher = {GitHub}, howpublished = {\url{https://github.com/frgfm/torch-scan}} } ``` ## Contributing Any sort of contribution is greatly appreciated! You can find a short guide in [`CONTRIBUTING`](CONTRIBUTING.md) to help grow this project! ## License Distributed under the Apache 2.0 License. See [`LICENSE`](LICENSE) for more information. ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/README.md ================================================ # Changing the documentation The documentation of this project is built using `sphinx`. In order to install all the build dependencies, run the following command from the root folder of the repository: ```shell pip install -e ".[docs]" ``` --- **NOTE** You are only generating the documentation to inspect it locally. Only the source files are pushed to the remote repository, the documentation will be built automatically by the CI. --- ## Build the documentation ### Latest version In most cases, you will only be changing the documentation of the latest version (dev version). In this case, you can build the documentation (the HTML files) with the following command: ```shell sphinx-build docs/source docs/_build -a ``` Then open `docs/_build/index.html` in your web browser to navigate in it. ### Multi-version documentation In rare cases, you might want to modify the documentation for other versions. You will then have to build the documentation for the multiple versions of the package, which you can do by running this command from the `docs` folder: ```shell bash build.sh ``` ================================================ FILE: docs/build.sh ================================================ function deploy_doc(){ if [ ! -z "$1" ] then git checkout $1 fi COMMIT=$(git rev-parse --short HEAD) echo "Creating doc at commit" $COMMIT "and pushing to folder $2" pip install -U .. if [ ! -z "$2" ] then if [ "$2" == "latest" ]; then echo "Pushing main" sphinx-build source build/$2 -a elif [ -d build/$2 ]; then echo "Directory" $2 "already exists" else echo "Pushing version" $2 cp -r _static source/ && cp _conf.py source/conf.py sphinx-build source build/$2 -a fi else echo "Pushing stable" cp -r _static source/ && cp _conf.py source/conf.py sphinx-build source build -a fi git checkout source/ && git clean -f source/ } # exit when any command fails set -e # You can find the commit for each tag on https://github.com/frgfm/torch-scan/tags if [ -d build ]; then rm -Rf build; fi mkdir build cp -r source/_static . cp source/conf.py _conf.py git fetch --all --tags --unshallow deploy_doc "" latest deploy_doc "7ac9c839" v0.1.0 deploy_doc "900eb166" v0.1.1 deploy_doc "29fa4ed1" # v0.1.2 Latest stable release rm -rf _build _static ================================================ FILE: docs/make.bat ================================================ @ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=source set BUILDDIR=build if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd ================================================ FILE: docs/source/_static/css/custom.css ================================================ h1 { font-size: 200%; } /* Github button */ .github-repo { display: flex; justify-content: center; } /* Version control */ .version-button { color: gray; border: none; padding: 5px; font-size: 15px; cursor: pointer; } .version-button:hover, .version-button:focus { color: white; background-color: gray; } .version-dropdown { display: none; min-width: 160px; overflow: auto; font-size: 15px; } .version-dropdown a { color: gray; padding: 3px 4px; text-decoration: none; display: block; } .version-dropdown a:hover { color: white; background-color: gray; } .version-show { display: block; } ================================================ FILE: docs/source/_static/js/custom.js ================================================ // Based on https://github.com/huggingface/transformers/blob/master/docs/source/_static/js/custom.js // These two things need to be updated at each release for the version selector. // Last stable version const stableVersion = "v0.1.2" // Dictionary doc folder to label. The last stable version should have an empty key. const versionMapping = { "latest": "latest", "": "v0.1.2 (stable)", "v0.1.1": "v0.1.1", "v0.1.0": "v0.1.0", } function addGithubButton() { const div = ` `; document.querySelector(".sidebar-brand").insertAdjacentHTML('afterend', div); } function addVersionControl() { // To grab the version currently in view, we parse the url const parts = location.toString().split('/'); let versionIndex = parts.length - 2; // Index page may not have a last part with filename.html so we need to go up if (parts[parts.length - 1] != "" && ! parts[parts.length - 1].match(/\.html$|^search.html?/)) { versionIndex = parts.length - 1; } const version = parts[versionIndex]; // Menu with all the links, const versionMenu = document.createElement("div"); const htmlLines = []; for (const [key, value] of Object.entries(versionMapping)) { let baseUrlIndex = (version == "torch-scan") ? versionIndex + 1: versionIndex; var urlParts = parts.slice(0, baseUrlIndex); if (key != "") { urlParts = urlParts.concat([key]); } urlParts = urlParts.concat(parts.slice(versionIndex+1)); htmlLines.push(`${value}`); } versionMenu.classList.add("version-dropdown"); versionMenu.innerHTML = htmlLines.join('\n'); // Button for version selection const versionButton = document.createElement("div"); versionButton.classList.add("version-button"); let label = (version == "torch-scan") ? stableVersion : version versionButton.innerText = label.concat(" ▼"); // Toggle the menu when we click on the button versionButton.addEventListener("click", () => { versionMenu.classList.toggle("version-show"); }); // Hide the menu when we click elsewhere window.addEventListener("click", (event) => { if (event.target != versionButton){ versionMenu.classList.remove('version-show'); } }); // Container const div = document.createElement("div"); div.appendChild(versionButton); div.appendChild(versionMenu); div.style.paddingTop = '5px'; div.style.paddingBottom = '5px'; div.style.display = 'block'; div.style.textAlign = 'center'; const scrollDiv = document.querySelector(".sidebar-brand"); scrollDiv.insertBefore(div, scrollDiv.children[1]); } /*! * github-buttons v2.2.10 * (c) 2019 なつき * @license BSD-2-Clause */ /** * modified to run programmatically */ function parseGithubButtons (){"use strict";var e=window.document,t=e.location,o=window.encodeURIComponent,r=window.decodeURIComponent,n=window.Math,a=window.HTMLElement,i=window.XMLHttpRequest,l="https://unpkg.com/github-buttons@2.2.10/dist/buttons.html",c=i&&i.prototype&&"withCredentials"in i.prototype,d=c&&a&&a.prototype.attachShadow&&!a.prototype.attachShadow.prototype,s=function(e,t,o){e.addEventListener?e.addEventListener(t,o):e.attachEvent("on"+t,o)},u=function(e,t,o){e.removeEventListener?e.removeEventListener(t,o):e.detachEvent("on"+t,o)},h=function(e,t,o){var r=function(n){return u(e,t,r),o(n)};s(e,t,r)},f=function(e,t,o){var r=function(n){if(t.test(e.readyState))return u(e,"readystatechange",r),o(n)};s(e,"readystatechange",r)},p=function(e){return function(t,o,r){var n=e.createElement(t);if(o)for(var a in o){var i=o[a];null!=i&&(null!=n[a]?n[a]=i:n.setAttribute(a,i))}if(r)for(var l=0,c=r.length;l'},eye:{width:16,height:16,path:''},star:{width:14,height:16,path:''},"repo-forked":{width:10,height:16,path:''},"issue-opened":{width:14,height:16,path:''},"cloud-download":{width:16,height:16,path:''}},w={},x=function(e,t,o){var r=p(e.ownerDocument),n=e.appendChild(r("style",{type:"text/css"}));n.styleSheet?n.styleSheet.cssText=m:n.appendChild(e.ownerDocument.createTextNode(m));var a,l,d=r("a",{className:"btn",href:t.href,target:"_blank",innerHTML:(a=t["data-icon"],l=/^large$/i.test(t["data-size"])?16:14,a=(""+a).toLowerCase().replace(/^octicon-/,""),{}.hasOwnProperty.call(v,a)||(a="mark-github"),'"),"aria-label":t["aria-label"]||void 0},[" ",r("span",{},[t["data-text"]||""])]);/\.github\.com$/.test("."+d.hostname)?/^https?:\/\/((gist\.)?github\.com\/[^\/?#]+\/[^\/?#]+\/archive\/|github\.com\/[^\/?#]+\/[^\/?#]+\/releases\/download\/|codeload\.github\.com\/)/.test(d.href)&&(d.target="_top"):(d.href="#",d.target="_self");var u,h,g,x,y=e.appendChild(r("div",{className:"widget"+(/^large$/i.test(t["data-size"])?" lg":"")},[d]));/^(true|1)$/i.test(t["data-show-count"])&&"github.com"===d.hostname&&(u=d.pathname.replace(/^(?!\/)/,"/").match(/^\/([^\/?#]+)(?:\/([^\/?#]+)(?:\/(?:(subscription)|(fork)|(issues)|([^\/?#]+)))?)?(?:[\/?#]|$)/))&&!u[6]?(u[2]?(h="/repos/"+u[1]+"/"+u[2],u[3]?(x="subscribers_count",g="watchers"):u[4]?(x="forks_count",g="network"):u[5]?(x="open_issues_count",g="issues"):(x="stargazers_count",g="stargazers")):(h="/users/"+u[1],g=x="followers"),function(e,t){var o=w[e]||(w[e]=[]);if(!(o.push(t)>1)){var r=b(function(){for(delete w[e];t=o.shift();)t.apply(null,arguments)});if(c){var n=new i;s(n,"abort",r),s(n,"error",r),s(n,"load",function(){var e;try{e=JSON.parse(n.responseText)}catch(e){return void r(e)}r(200!==n.status,e)}),n.open("GET",e),n.send()}else{var a=this||window;a._=function(e){a._=null,r(200!==e.meta.status,e.data)};var l=p(a.document)("script",{async:!0,src:e+(/\?/.test(e)?"&":"?")+"callback=_"}),d=function(){a._&&a._({meta:{}})};s(l,"load",d),s(l,"error",d),l.readyState&&f(l,/de|m/,d),a.document.getElementsByTagName("head")[0].appendChild(l)}}}.call(this,"https://api.github.com"+h,function(e,t){if(!e){var n=t[x];y.appendChild(r("a",{className:"social-count",href:t.html_url+"/"+g,target:"_blank","aria-label":n+" "+x.replace(/_count$/,"").replace("_"," ").slice(0,n<2?-1:void 0)+" on GitHub"},[r("b"),r("i"),r("span",{},[(""+n).replace(/\B(?=(\d{3})+(?!\d))/g,",")])]))}o&&o(y)})):o&&o(y)},y=window.devicePixelRatio||1,C=function(e){return(y>1?n.ceil(n.round(e*y)/y*2)/2:n.ceil(e))||0},F=function(e,t){e.style.width=t[0]+"px",e.style.height=t[1]+"px"},k=function(t,r){if(null!=t&&null!=r)if(t.getAttribute&&(t=function(e){for(var t={href:e.href,title:e.title,"aria-label":e.getAttribute("aria-label")},o=["icon","text","size","show-count"],r=0,n=o.length;r`_ v0.1.1 (2020-08-04) ------------------- Release note: `v0.1.1 `_ v0.1.0 (2020-05-21) ------------------- Release note: `v0.1.0 `_ ================================================ FILE: docs/source/conf.py ================================================ # Copyright (C) 2020-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. # Configuration file for the Sphinx documentation builder. # # This file only contains a selection of the most common options. For a full # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import sys from datetime import datetime from pathlib import Path sys.path.insert(0, Path().cwd().parent.parent) import torchscan # -- Project information ----------------------------------------------------- master_doc = "index" project = "torchscan" copyright = f"2020-{datetime.now().year}, François-Guillaume Fernandez" author = "François-Guillaume Fernandez" # The full version, including alpha/beta/rc tags version = torchscan.__version__ release = torchscan.__version__ + "-git" # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx.ext.viewcode", "sphinx.ext.mathjax", "sphinxemoji.sphinxemoji", # cf. https://sphinxemojicodes.readthedocs.io/en/stable/ "sphinx_copybutton", ] napoleon_use_ivar = True # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. pygments_style = "friendly" pygments_dark_style = "monokai" highlight_language = "python3" # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = "furo" html_title = "Torchscan" html_logo = "_static/images/logo.png" html_favicon = "_static/images/favicon.ico" language = "en" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { "footer_icons": [ { "name": "GitHub", "url": "https://github.com/frgfm/torch-scan", "html": """ """, "class": "", }, ], "source_repository": "https://github.com/frgfm/torch-scan/", "source_branch": "main", "source_directory": "docs/source/", "sidebar_hide_name": True, } # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] # Add googleanalytics id # ref: https://github.com/orenhecht/googleanalytics/blob/master/sphinxcontrib/googleanalytics.py def add_ga_javascript(app, pagename, templatename, context, doctree): metatags = context.get("metatags", "") metatags += """ """.format(app.config.googleanalytics_id) context["metatags"] = metatags def setup(app): app.add_config_value("googleanalytics_id", "UA-148140560-3", "html") app.add_css_file("css/custom.css") app.add_js_file("js/custom.js") app.connect("html-page-context", add_ga_javascript) ================================================ FILE: docs/source/index.rst ================================================ ************************************** TorchScan: inspect your PyTorch models ************************************** The :mod:`torchscan` package provides tools for analyzing your PyTorch modules and models. Additionally to performance benchmarks, a comprehensive architecture comparison require some insights in the model complexity, its usage of computational and memory resources. This project is meant for: * |:zap:| **exploration**: easily assess the influence of your architecture on resource consumption * |:woman_scientist:| **research**: quickly implement your own ideas to mitigate latency .. toctree:: :maxdepth: 2 :caption: Getting Started :hidden: installing .. toctree:: :maxdepth: 1 :caption: Package Reference :hidden: torchscan modules process utils .. toctree:: :maxdepth: 2 :caption: Notes :hidden: changelog Supported layers ^^^^^^^^^^^^^^^^ Here is the list of supported layers for FLOPS, MACs, DMAs and receptive field computation: Non-linear activations """""""""""""""""""""" * `torch.nn.ReLU `_ * `torch.nn.ELU `_ * `torch.nn.LeakyReLU `_ * `torch.nn.ReLU6 `_ * `torch.nn.Tanh `_ * `torch.nn.Sigmoid `_ Linear layers """"""""""""" * `torch.nn.Identity `_ * `torch.nn.Linear `_ Convolutions """""""""""" * `torch.nn.Conv1d `_ * `torch.nn.Conv2d `_ * `torch.nn.Conv3d `_ * `torch.nn.ConvTranspose1d `_ * `torch.nn.ConvTranspose2d `_ * `torch.nn.ConvTranspose3d `_ Pooling """"""" * `torch.nn.MaxPool1d `_ * `torch.nn.MaxPool2d `_ * `torch.nn.MaxPool3d `_ * `torch.nn.AvgPool1d `_ * `torch.nn.AvgPool2d `_ * `torch.nn.AvgPool3d `_ * `torch.nn.AdaptiveMaxPool1d `_ * `torch.nn.AdaptiveMaxPool2d `_ * `torch.nn.AdaptiveMaxPool3d `_ * `torch.nn.AdaptiveAvgPool1d `_ * `torch.nn.AdaptiveAvgPool2d `_ * `torch.nn.AdaptiveAvgPool3d `_ Normalization """"""""""""" * `torch.nn.BatchNorm1d `_ * `torch.nn.BatchNorm2d `_ * `torch.nn.BatchNorm3d `_ Other """"" * `torch.nn.Flatten `_ * `torch.nn.Dropout `_ *Please note that the functional API of PyTorch is not supported.* ================================================ FILE: docs/source/installing.rst ================================================ ************ Installation ************ This library requires `Python `_ 3.6 or higher. Via Python Package ================== Install the last stable release of the package using `pip `_: .. code:: bash pip install torchscan Via Conda ========= Install the last stable release of the package using `conda `_: .. code:: bash conda install -c frgfm torchscan Via Git ======= Install the library in developer mode: .. code:: bash git clone https://github.com/frgfm/torch-scan.git pip install -e torch-scan/. ================================================ FILE: docs/source/modules.rst ================================================ torchscan.modules ================= The modules subpackage contains tools for inspection of modules. .. currentmodule:: torchscan.modules FLOPs ----- Related to the number of floating point operations performed during model inference. .. autofunction:: module_flops MACs ----- Related to the number of multiply-accumulate operations performed during model inference .. autofunction:: module_macs DMAs ---- Related to the number of direct memory accesses during model inference .. autofunction:: module_dmas Receptive field --------------- Related to the effective receptive field of a layer .. autofunction:: module_rf ================================================ FILE: docs/source/process.rst ================================================ torchscan.process ================= The process subpackage contains tools regarding active Python processes. The following models are available: .. automodule:: torchscan.process .. currentmodule:: torchscan.process .. autofunction:: get_process_gpu_ram ================================================ FILE: docs/source/torchscan.rst ================================================ torchscan ========= .. currentmodule:: torchscan Crawler ~~~~~~~ .. autofunction:: crawl_module .. autofunction:: summary ================================================ FILE: docs/source/utils.rst ================================================ torchscan.utils =============== .. currentmodule:: torchscan.utils .. autofunction:: format_info .. autofunction:: aggregate_info ================================================ FILE: pyproject.toml ================================================ [build-system] requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" [project] name = "torchscan" description = "Useful information about your Pytorch module" authors = [ {name = "François-Guillaume Fernandez", email = "fg-feedback@protonmail.com"} ] readme = "README.md" requires-python = ">=3.8,<4" license = {file = "LICENSE"} keywords = ["pytorch", "deep learning", "summary", "memory", "ram"] classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "License :: OSI Approved :: Apache Software License", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Mathematics", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] dynamic = ["version"] dependencies = [ "torch>=2.0.0,<3.0.0", ] [project.optional-dependencies] test = [ "pytest>=7.3.2", "pytest-cov>=3.0.0,<5.0.0", "pytest-pretty>=1.0.0,<2.0.0", ] quality = [ "ruff==0.6.4", "mypy==1.14.0", "pre-commit>=3.0.0,<4.0.0", ] docs = [ "sphinx>=3.0.0,!=3.5.0", "furo>=2022.3.4", "sphinxemoji>=0.1.8", "sphinx-copybutton>=0.3.1", # Indirect deps # cf. https://github.com/readthedocs/readthedocs.org/issues/9038 "Jinja2<3.1", ] dev = [ # test "pytest>=7.3.2", "pytest-cov>=3.0.0,<5.0.0", "pytest-pretty>=1.0.0,<2.0.0", # style "ruff==0.6.4", "mypy==1.14.0", "pre-commit>=3.0.0,<4.0.0", # docs "sphinx>=3.0.0,!=3.5.0", "furo>=2022.3.4", "sphinxemoji>=0.1.8", "sphinx-copybutton>=0.3.1", "Jinja2<3.1", ] [project.urls] documentation = "https://frgfm.github.io/torch-scan" repository = "https://github.com/frgfm/torch-scan" tracker = "https://github.com/frgfm/torch-scan/issues" changelog = "https://frgfm.github.io/torch-scan/latest/changelog.html" [tool.setuptools] zip-safe = true [tool.setuptools.packages.find] exclude = ["docs*", "scripts*", "tests*"] [tool.pytest.ini_options] testpaths = ["torchscan/"] [tool.coverage.run] source = ["torchscan/"] [tool.ruff] line-length = 120 target-version = "py311" preview = true [tool.ruff.lint] select = [ "F", # pyflakes "E", # pycodestyle errors "W", # pycodestyle warnings "I", # isort "N", # pep8-naming "D101", "D103", # pydocstyle missing docstring in public function/class "D201","D202","D207","D208","D214","D215","D300","D301","D417", "D419", # pydocstyle "YTT", # flake8-2020 "ANN", # flake8-annotations "ASYNC", # flake8-async "S", # flake8-bandit "BLE", # flake8-blind-except "B", # flake8-bugbear "A", # flake8-builtins "COM", # flake8-commas "CPY", # flake8-copyright "C4", # flake8-comprehensions "T10", # flake8-debugger "ISC", # flake8-implicit-str-concat "ICN", # flake8-import-conventions "LOG", # flake8-logging "PIE", # flake8-pie "T20", # flake8-print "PYI", # flake8-pyi "PT", # flake8-pytest-style "Q", # flake8-quotes "RET", # flake8-return "SLF", # flake8-self "SIM", # flake8-simplify "ARG", # flake8-unused-arguments "PTH", # flake8-use-pathlib "PERF", # perflint "NPY", # numpy "FAST", # fastapi "FURB", # refurb "RUF", # ruff specific "N", # pep8-naming ] ignore = [ "E501", # line too long, handled by black "B008", # do not perform function calls in argument defaults "B904", # raise from "C901", # too complex "F403", # star imports "E731", # lambda assignment "C416", # list comprehension to list() "ANN101", # missing type annotations on self "ANN102", # missing type annotations on cls "ANN002", # missing type annotations on *args "ANN003", # missing type annotations on **kwargs "COM812", # trailing comma missing "N812", # lowercase imported as non-lowercase "ISC001", # implicit string concatenation (handled by format) "ANN401", # Dynamically typed expressions (typing.Any) are disallowed "SLF001", # Private member accessed ] exclude = [".git"] [tool.ruff.lint.flake8-quotes] docstring-quotes = "double" [tool.ruff.lint.isort] known-first-party = ["torchscan", "app"] known-third-party = ["torch", "torchvision"] [tool.ruff.lint.per-file-ignores] "**/__init__.py" = ["I001", "F401", "CPY001"] "scripts/**.py" = ["D", "T201", "N812", "S101", "ANN"] ".github/**.py" = ["D", "T201", "S602", "S101", "ANN"] "docs/**.py" = ["E402", "D103", "ANN", "A001", "ARG001"] "tests/**.py" = ["D101", "D103", "CPY001", "S101", "PT011", "ANN", "SLF001"] "demo/**.py" = ["D103", "ANN"] "setup.py" = ["T201"] "torchscan/process/memory.py" = ["S60"] [tool.ruff.format] quote-style = "double" indent-style = "space" [tool.mypy] python_version = "3.11" files = "torchscan/" show_error_codes = true pretty = true warn_unused_ignores = true warn_redundant_casts = true no_implicit_optional = true disallow_untyped_calls = true check_untyped_defs = true implicit_reexport = false disallow_untyped_defs = true explicit_package_bases = true ================================================ FILE: scripts/benchmark.py ================================================ # Copyright (C) 2020-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. """ Torchvision benchmark """ import torch from torchvision import models from torchscan import crawl_module TORCHVISION_MODELS = [ "alexnet", "googlenet", "vgg11", "vgg11_bn", "vgg13", "vgg13_bn", "vgg16", "vgg16_bn", "vgg19", "vgg19_bn", "resnet18", "resnet34", "resnet50", "resnet101", "resnet152", "inception_v3", "squeezenet1_0", "squeezenet1_1", "wide_resnet50_2", "wide_resnet101_2", "densenet121", "densenet161", "densenet169", "densenet201", "resnext50_32x4d", "resnext101_32x8d", "mobilenet_v2", "shufflenet_v2_x0_5", "shufflenet_v2_x1_0", "shufflenet_v2_x1_5", "shufflenet_v2_x2_0", "mnasnet0_5", "mnasnet0_75", "mnasnet1_0", "mnasnet1_3", ] def main(): device = "cuda" if torch.cuda.is_available() else "cpu" margin = 4 headers = ["Model", "Params (M)", "FLOPs (G)", "MACs (G)", "DMAs (G)", "RF"] max_w = [20, 10, 10, 10, 10, 10] info_str = [(" " * margin).join([f"{col_name:<{col_w}}" for col_name, col_w in zip(headers, max_w, strict=False)])] info_str.append("-" * len(info_str[0])) print("\n".join(info_str)) for name in TORCHVISION_MODELS: model = models.__dict__[name]().eval().to(device) dsize = (3, 224, 224) if "inception" in name: dsize = (3, 299, 299) model_info = crawl_module(model, dsize) tot_params = sum(layer["grad_params"] + layer["nograd_params"] for layer in model_info["layers"]) tot_flops = sum(layer["flops"] for layer in model_info["layers"]) tot_macs = sum(layer["macs"] for layer in model_info["layers"]) tot_dmas = sum(layer["dmas"] for layer in model_info["layers"]) rf = model_info["layers"][0]["rf"] print( f"{name:<{max_w[0]}} | {tot_params / 1e6:<{max_w[1]}.2f} | {tot_flops / 1e9:<{max_w[2]}.2f} | " f"{tot_macs / 1e9:<{max_w[3]}.2f} | {tot_dmas / 1e9:<{max_w[4]}.2f} | {rf:<{max_w[5]}.0f}" ) if __name__ == "__main__": main() ================================================ FILE: setup.py ================================================ # Copyright (C) 2020-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import os from pathlib import Path from setuptools import setup PKG_NAME = "torchscan" VERSION = os.getenv("BUILD_VERSION", "0.2.0.dev0") if __name__ == "__main__": print(f"Building wheel {PKG_NAME}-{VERSION}") # Dynamically set the __version__ attribute cwd = Path(__file__).parent.absolute() with cwd.joinpath("torchscan", "version.py").open("w", encoding="utf-8") as f: f.write(f"__version__ = '{VERSION}'\n") setup(name=PKG_NAME, version=VERSION) ================================================ FILE: tests/test_crawler.py ================================================ import io import sys from collections import OrderedDict import pytest import torch.nn as nn from torchscan import crawler def test_apply(): multi_convs = nn.Sequential(nn.Conv2d(16, 32, 3), nn.Conv2d(32, 64, 3)) mod = nn.Sequential(nn.Conv2d(3, 16, 3), multi_convs) # Tag module attributes def tag_name(mod, name): mod.__depth__ = len(name.split(".")) - 1 mod.__name__ = name.rpartition(".")[-1] crawler.apply(mod, tag_name) assert mod[1][1].__depth__ == 2 assert mod[1][1].__name__ == "1" def test_crawl_module(): mod = nn.Conv2d(3, 8, 3) res = crawler.crawl_module(mod, (3, 32, 32)) assert isinstance(res, dict) assert res["overall"]["grad_params"] == 224 assert res["layers"][0]["output_shape"] == (-1, 8, 30, 30) def test_summary(): mod = nn.Conv2d(3, 8, 3) # Redirect stdout with StringIO object captured_output = io.StringIO() sys.stdout = captured_output crawler.summary(mod, (3, 32, 32)) # Reset redirect. sys.stdout = sys.__stdout__ assert captured_output.getvalue().split("\n")[7] == "Total params: 224" # Check receptive field captured_output = io.StringIO() sys.stdout = captured_output crawler.summary(mod, (3, 32, 32), receptive_field=True) # Reset redirect. sys.stdout = sys.__stdout__ assert captured_output.getvalue().split("\n")[1].rpartition(" ")[-1] == "Receptive field" assert captured_output.getvalue().split("\n")[3].split()[-1] == "3" # Check effective stats captured_output = io.StringIO() sys.stdout = captured_output crawler.summary(mod, (3, 32, 32), receptive_field=True, effective_rf_stats=True) # Reset redirect. sys.stdout = sys.__stdout__ assert captured_output.getvalue().split("\n")[1].rpartition(" ")[-1] == "Effective padding" assert captured_output.getvalue().split("\n")[3].split()[-1] == "0" # Max depth > model hierarchy with pytest.raises(ValueError): crawler.summary(mod, (3, 32, 32), max_depth=1) mod = nn.Sequential( OrderedDict([ ("features", nn.Sequential(nn.Conv2d(3, 8, 3), nn.ReLU(inplace=True))), ("pool", nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.Flatten(1))), ("classifier", nn.Linear(8, 1)), ]) ) captured_output = io.StringIO() sys.stdout = captured_output crawler.summary(mod, (3, 32, 32), max_depth=1) # Reset redirect. sys.stdout = sys.__stdout__ assert captured_output.getvalue().split("\n")[4].startswith("├─features ") ================================================ FILE: tests/test_modules.py ================================================ import pytest import torch from torch import nn from torchscan import modules class MyModule(nn.Module): def __init__(self): super().__init__() def test_module_flops_warning(): with pytest.warns(UserWarning): modules.module_flops(MyModule(), None, None) @pytest.mark.parametrize( ("mod", "input_shape", "output_shape", "expected_val"), [ # Check for unknown module that it returns 0 and throws a warning (MyModule(), (1,), (1,), 0), # Fully-connected (nn.Linear(8, 4), (1, 8), (1, 4), 4 * (2 * 8 - 1) + 4), (nn.Linear(8, 4, bias=False), (1, 8), (1, 4), 4 * (2 * 8 - 1)), (nn.Linear(8, 4), (1, 2, 8), (1, 2, 4), 2 * (4 * (2 * 8 - 1) + 4)), # Activations (nn.Identity(), (1, 8), (1, 8), 0), (nn.Flatten(), (1, 8), (1, 8), 0), (nn.ReLU(), (1, 8), (1, 8), 8), (nn.ELU(), (1, 8), (1, 8), 48), (nn.LeakyReLU(), (1, 8), (1, 8), 32), (nn.ReLU6(), (1, 8), (1, 8), 16), (nn.Tanh(), (1, 8), (1, 8), 48), (nn.Sigmoid(), (1, 8), (1, 8), 32), # BN (nn.BatchNorm1d(8), (1, 8, 4), (1, 8, 4), 144 + 32 + 32 * 3 + 48), # Pooling (nn.MaxPool2d((2, 2)), (1, 8, 4, 4), (1, 8, 2, 2), 3 * 32), (nn.AvgPool2d((2, 2)), (1, 8, 4, 4), (1, 8, 2, 2), 5 * 32), (nn.AdaptiveMaxPool2d((2, 2)), (1, 8, 4, 4), (1, 8, 2, 2), 3 * 32), (nn.AdaptiveMaxPool2d(2), (1, 8, 4, 4), (1, 8, 2, 2), 3 * 32), (nn.AdaptiveAvgPool2d((2, 2)), (1, 8, 4, 4), (1, 8, 2, 2), 5 * 32), (nn.AdaptiveAvgPool2d(2), (1, 8, 4, 4), (1, 8, 2, 2), 5 * 32), # Dropout (nn.Dropout(), (1, 8), (1, 8), 8), (nn.Dropout(p=0), (1, 8), (1, 8), 0), # Conv (nn.Conv2d(3, 8, 3), (1, 3, 32, 32), (1, 8, 30, 30), 388800), (nn.ConvTranspose2d(3, 8, 3), (1, 3, 32, 32), (1, 8, 34, 34), 499408), ], ) def test_module_flops(mod, input_shape, output_shape, expected_val): assert modules.module_flops(mod, (torch.zeros(input_shape),), torch.zeros(output_shape)) == expected_val def test_transformer_flops(): mod = nn.Transformer(d_model=64, nhead=4, num_encoder_layers=3) src = torch.rand((10, 16, 64)) tgt = torch.rand((20, 16, 64)) assert modules.module_flops(mod, (src, tgt), mod(src, tgt)) == 774952841 def test_module_macs_warning(): with pytest.warns(UserWarning): modules.module_macs(MyModule(), None, None) @pytest.mark.parametrize( ("mod", "input_shape", "output_shape", "expected_val"), [ # Check for unknown module that it returns 0 and throws a warning (MyModule(), (1,), (1,), 0), # Fully-connected (nn.Linear(8, 4), (1, 8), (1, 4), 8 * 4), (nn.Linear(8, 4), (1, 2, 8), (1, 2, 4), 8 * 4 * 2), # Activations (nn.ReLU(), (1, 8), (1, 8), 0), # BN (nn.BatchNorm1d(8), (1, 8, 4), (1, 8, 4), 64 + 24 + 56 + 32), # Pooling (nn.MaxPool2d((2, 2)), (1, 8, 4, 4), (1, 8, 2, 2), 3 * 32), (nn.AvgPool2d((2, 2)), (1, 8, 4, 4), (1, 8, 2, 2), 5 * 32), (nn.AdaptiveMaxPool2d((2, 2)), (1, 8, 4, 4), (1, 8, 2, 2), 3 * 32), (nn.AdaptiveMaxPool2d(2), (1, 8, 4, 4), (1, 8, 2, 2), 3 * 32), (nn.AdaptiveAvgPool2d((2, 2)), (1, 8, 4, 4), (1, 8, 2, 2), 5 * 32), (nn.AdaptiveAvgPool2d(2), (1, 8, 4, 4), (1, 8, 2, 2), 5 * 32), # Dropout (nn.Dropout(), (1, 8), (1, 8), 0), # Conv (nn.Conv2d(3, 8, 3), (1, 3, 32, 32), (1, 8, 30, 30), 194400), (nn.ConvTranspose2d(3, 8, 3), (1, 3, 32, 32), (1, 8, 34, 34), 249704), ], ) def test_module_macs(mod, input_shape, output_shape, expected_val): assert modules.module_macs(mod, torch.zeros(input_shape), torch.zeros(output_shape)) == expected_val def test_module_dmas_warning(): with pytest.warns(UserWarning): modules.module_dmas(MyModule(), None, None) @pytest.mark.parametrize( ("mod", "input_shape", "output_shape", "expected_val"), [ # Check for unknown module that it returns 0 and throws a warning (MyModule(), (1,), (1,), 0), # Fully-connected (nn.Linear(8, 4), (1, 8), (1, 4), 4 * (8 + 1) + 8 + 4), (nn.Linear(8, 4), (1, 2, 8), (1, 2, 4), 4 * (8 + 1) + 2 * (8 + 4)), # Activations (nn.Identity(), (1, 8), (1, 8), 8), (nn.Flatten(), (1, 8), (1, 8), 16), (nn.ReLU(), (1, 8), (1, 8), 8 * 2), (nn.ReLU(inplace=True), (1, 8), (1, 8), 8), (nn.ELU(), (1, 8), (1, 8), 17), (nn.Tanh(), (1, 8), (1, 8), 24), (nn.Sigmoid(), (1, 8), (1, 8), 16), # BN (nn.BatchNorm1d(8), (1, 8, 4), (1, 8, 4), 32 + 17 + 16 + 1 + 17 + 32), # Pooling (nn.MaxPool2d((2, 2)), (1, 8, 4, 4), (1, 8, 2, 2), 4 * 32 + 32), (nn.MaxPool2d(2), (1, 8, 4, 4), (1, 8, 2, 2), 4 * 32 + 32), (nn.AdaptiveMaxPool2d((2, 2)), (1, 8, 4, 4), (1, 8, 2, 2), 4 * 32 + 32), (nn.AdaptiveMaxPool2d(2), (1, 8, 4, 4), (1, 8, 2, 2), 4 * 32 + 32), # Dropout (nn.Dropout(), (1, 8), (1, 8), 17), # Conv (nn.Conv2d(3, 8, 3), (1, 3, 32, 32), (1, 8, 30, 30), 201824), (nn.ConvTranspose2d(3, 8, 3), (1, 3, 32, 32), (1, 8, 34, 34), 259178), ], ) def test_module_dmas(mod, input_shape, output_shape, expected_val): assert modules.module_dmas(mod, torch.zeros(input_shape), torch.zeros(output_shape)) == expected_val # @torch.no_grad() # def test_module_rf(self): # # Check for unknown module that it returns 0 and throws a warning # self.assertEqual(modules.module_rf(MyModule(), None, None), (1, 1, 0)) # self.assertWarns(UserWarning, modules.module_rf, MyModule(), None, None) # # Common unit tests # # Linear # self.assertEqual(modules.module_rf(nn.Linear(8, 4), torch.zeros((1, 8)), torch.zeros((1, 4))), # (1, 1, 0)) # # Activation # self.assertEqual(modules.module_rf(nn.Identity(), torch.zeros((1, 8)), torch.zeros((1, 8))), (1, 1, 0)) # self.assertEqual(modules.module_rf(nn.Flatten(), torch.zeros((1, 8)), torch.zeros((1, 8))), (1, 1, 0)) # self.assertEqual(modules.module_rf(nn.ReLU(), torch.zeros((1, 8)), torch.zeros((1, 8))), (1, 1, 0)) # self.assertEqual(modules.module_rf(nn.ELU(), torch.zeros((1, 8)), torch.zeros((1, 8))), (1, 1, 0)) # self.assertEqual(modules.module_rf(nn.Sigmoid(), torch.zeros((1, 8)), torch.zeros((1, 8))), (1, 1, 0)) # self.assertEqual(modules.module_rf(nn.Tanh(), torch.zeros((1, 8)), torch.zeros((1, 8))), (1, 1, 0)) # # Conv # input_t = torch.rand((1, 3, 32, 32)) # mod = nn.Conv2d(3, 8, 3) # self.assertEqual(modules.module_rf(mod, input_t, mod(input_t)), (3, 1, 0)) # # Check for dilation support # mod = nn.Conv2d(3, 8, 3, dilation=2) # self.assertEqual(modules.module_rf(mod, input_t, mod(input_t)), (5, 1, 0)) # # ConvTranspose # mod = nn.ConvTranspose2d(3, 8, 3) # self.assertEqual(modules.module_rf(mod, input_t, mod(input_t)), (-3, 1, 0)) # # BN # self.assertEqual(modules.module_rf(nn.BatchNorm1d(8), torch.zeros((1, 8, 4)), torch.zeros((1, 8, 4))), # (1, 1, 0)) # # Pooling # self.assertEqual(modules.module_rf(nn.MaxPool2d((2, 2)), # torch.zeros((1, 8, 4, 4)), torch.zeros((1, 8, 2, 2))), # (2, 2, 0)) # self.assertEqual(modules.module_rf(nn.AdaptiveMaxPool2d((2, 2)), # torch.zeros((1, 8, 4, 4)), torch.zeros((1, 8, 2, 2))), # (2, 2, 0)) # # Dropout # self.assertEqual(modules.module_rf(nn.Dropout(), torch.zeros((1, 8)), torch.zeros((1, 8))), (1, 1, 0)) ================================================ FILE: tests/test_process.py ================================================ import os import torch from torchscan import process def test_get_process_gpu_ram(): if torch.cuda.is_initialized: assert process.get_process_gpu_ram(os.getpid()) >= 0 else: assert process.get_process_gpu_ram(os.getpid()) == 0 ================================================ FILE: tests/test_utils.py ================================================ import pytest from torchscan import utils def test_format_name(): name = "mymodule" assert utils.format_name(name) == name assert utils.format_name(name, depth=1) == f"├─{name}" assert utils.format_name(name, depth=3) == f"| | └─{name}" def test_wrap_string(): example = ".".join(["a" for _ in range(10)]) max_len = 10 wrap = "[...]" assert utils.wrap_string(example, max_len, mode="end") == example[: max_len - len(wrap)] + wrap assert utils.wrap_string(example, max_len, mode="mid") == f"{example[: max_len - 2 - len(wrap)]}{wrap}.a" assert utils.wrap_string(example, len(example), mode="end") == example with pytest.raises(ValueError): _ = utils.wrap_string(example, max_len, mode="test") @pytest.mark.parametrize( ("input_val", "num_val", "unit"), [ (3e14, 300, "T"), (3e10, 30, "G"), (3e7, 30, "M"), (15e3, 15, "k"), (500, 500, ""), ], ) def test_unit_scale(input_val, num_val, unit): assert utils.unit_scale(input_val) == (num_val, unit) ================================================ FILE: torchscan/__init__.py ================================================ from contextlib import suppress from torchscan import modules, process, utils from torchscan.crawler import * with suppress(ImportError): from .version import __version__ ================================================ FILE: torchscan/crawler.py ================================================ # Copyright (C) 2020-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import os from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union import torch from torch.nn import Module from .modules import module_dmas, module_flops, module_macs, module_rf from .process import get_process_gpu_ram from .utils import aggregate_info, format_info __all__ = ["crawl_module", "summary"] def apply(module: Module, fn: Callable[[Module, str], None], name: Optional[str] = None) -> None: """Modified version of `torch.nn.Module.apply` method Args: module: target module fn: function to apply to each module name: name of the current module """ if name is None: name = module.__class__.__name__.lower() fn(module, name) for n, m in module.named_children(): apply(m, fn, f"{name}.{n}") def crawl_module( module: Module, input_shape: Union[List[Tuple[int, ...]], Tuple[int, ...]], dtype: Optional[Union[torch.dtype, Iterable[torch.dtype]]] = None, ) -> Dict[str, Any]: """Retrieves module information for an expected input tensor shape >>> import torch.nn as nn >>> from torchscan import summary >>> mod = nn.Conv2d(3, 8, 3) >>> module_info = crawl_module(mod, (3, 224, 224)) Args: module: module to inspect input_shape: expected input shapes dtype: data type of each input argument to the module Returns: layer and overhead information """ # Get device and data types from model p = next(module.parameters()) device = p.device cuda_overhead, framework_overhead = 0.0, 0.0 if torch.cuda.is_available(): # Process RAM - allocator RAM cuda_overhead = get_process_gpu_ram(os.getpid()) - (torch.cuda.memory_reserved() / 1024**2) # Allocator RAM - Used RAM framework_overhead = (torch.cuda.memory_reserved() - torch.cuda.memory_allocated()) / 1024**2 # input if not isinstance(input_shape, list): input_shape = [input_shape] if dtype is None: dtype = p.data.dtype if isinstance(dtype, torch.dtype): dtype = [dtype] * len(input_shape) # Tensor arguments input_ts = [ torch.rand(1, *in_shape).to(dtype=_dtype, device=device) for in_shape, _dtype in zip(input_shape, dtype, strict=False) ] pre_fw_handles, post_fw_handles = [], [] pre_hook_tracker: Dict[int, Any] = {} post_hook_tracker: Dict[int, Any] = {} # Hook definition def _hook_info(module: Module, name: str) -> None: def _pre_hook(module: Module, inp: torch.Tensor) -> None: """Pre-forward hook""" # Check that another hook has not been triggered at this forward stage if not pre_hook_tracker[id(module)]["is_used"] and ( pre_hook_tracker[id(module)]["target"] == pre_hook_tracker[id(module)]["current"] ): # Add information # Params grad_params, nograd_params, param_size = 0, 0, 0 num_buffers, buffer_size = 0, 0 is_shared = False if not any(module.children()): # Parameters for p in module.parameters(): if id(p) not in param_ids: if p.requires_grad: grad_params += p.data.numel() else: nograd_params += p.data.numel() param_size += p.data.numel() * p.data.element_size() param_ids.append(id(p)) else: is_shared = True # Buffers for b in module.buffers(): if id(b) not in param_ids: num_buffers += b.numel() buffer_size += b.numel() * b.element_size() param_ids.append(id(b)) else: is_shared = True if call_idxs.get(id(module)) is None: call_idxs[id(module)] = [len(info)] else: call_idxs[id(module)].append(len(info)) info.append({ "name": name.rpartition(".")[-1], "depth": len(name.split(".")) - 1, "type": module.__class__.__name__, "input_shape": (-1, *inp[0][0].shape[1:]), "output_shape": None, "grad_params": grad_params, "nograd_params": nograd_params, "param_size": param_size, "num_buffers": num_buffers, "buffer_size": buffer_size, "flops": 0, "macs": 0, "dmas": 0, "rf": 1, "s": 1, "p": 0, "is_shared": is_shared, "is_leaf": not any(module.children()), }) # Mark the next hook for execution pre_hook_tracker[id(module)]["target"] += 1 # Current pass already used one of the hooks pre_hook_tracker[id(module)]["is_used"] = True pre_hook_tracker[id(module)]["current"] += 1 # All the hooks have been checked, reset the temporary values if pre_hook_tracker[id(module)]["current"] == len(module._forward_pre_hooks): pre_hook_tracker[id(module)]["current"] = 0 pre_hook_tracker[id(module)]["is_used"] = False def _fwd_hook(module: Module, inputs: Tuple[torch.Tensor, ...], out: torch.Tensor) -> None: """Post-forward hook""" # Check that another hook has not been triggered at this forward stage if not post_hook_tracker[id(module)]["is_used"] and ( post_hook_tracker[id(module)]["target"] == post_hook_tracker[id(module)]["current"] ): # Write information # Retrieve forward index if len(call_idxs[id(module)]) == 1: fw_idx = call_idxs[id(module)][0] else: # The first dictionary with output_shape=None is the correct one for _idx in call_idxs[id(module)]: if info[_idx]["output_shape"] is None: fw_idx = _idx break if any(module.children()): tot_flops, tot_macs, tot_dmas = 0, 0, 0 current_rf, current_stride, current_padding = 1.0, 1.0, 0.0 else: # Compute stats for standalone layers tot_flops = module_flops(module, inputs, out) tot_macs = module_macs(module, inputs[0], out) tot_dmas = module_dmas(module, inputs[0], out) current_rf, current_stride, current_padding = module_rf(module, inputs[0], out) # Update layer information info[fw_idx]["output_shape"] = (-1, *out.shape[1:]) # Add them, since some modules can be used several times info[fw_idx]["flops"] = tot_flops info[fw_idx]["macs"] = tot_macs info[fw_idx]["dmas"] = tot_dmas # Compute receptive field info[fw_idx]["rf"] = current_rf info[fw_idx]["s"] = current_stride info[fw_idx]["p"] = current_padding # Mark the next hook for execution post_hook_tracker[id(module)]["target"] += 1 # Current pass already used one of the hooks post_hook_tracker[id(module)]["is_used"] = True post_hook_tracker[id(module)]["current"] += 1 # All the hooks have been checked, reset the temporary values if post_hook_tracker[id(module)]["current"] == len(module._forward_pre_hooks): post_hook_tracker[id(module)]["current"] = 0 post_hook_tracker[id(module)]["is_used"] = False pre_fw_handles.append(module.register_forward_pre_hook(_pre_hook)) # type: ignore[arg-type] post_fw_handles.append(module.register_forward_hook(_fwd_hook)) # Handle modules that are used multiple times (with several hooks) pre_hook_tracker[id(module)] = {"current": 0, "target": 0, "is_used": False} post_hook_tracker[id(module)] = {"current": 0, "target": 0, "is_used": False} # Hook model info: List[Dict[str, Any]] = [] param_ids: List[int] = [] call_idxs: Dict[int, List[int]] = {} apply(module, _hook_info) # Forward with torch.no_grad(): module(*input_ts) # Removes all hooks using their handles for handle in pre_fw_handles: handle.remove() for handle in post_fw_handles: handle.remove() reserved_ram, diff_ram = 0.0, 0.0 if torch.cuda.is_available(): reserved_ram = torch.cuda.memory_reserved() / 1024**2 diff_ram = (torch.cuda.memory_reserved() - torch.cuda.memory_allocated()) / 1024**2 torch.cuda.synchronize() torch.cuda.empty_cache() grad_params, nograd_params, param_size = 0, 0, 0 num_buffers, buffer_size = 0, 0 for p in module.parameters(): if p.requires_grad: grad_params += p.data.numel() else: nograd_params += p.data.numel() param_size += p.data.numel() * p.data.element_size() for b in module.buffers(): num_buffers += b.numel() buffer_size += b.numel() * b.element_size() # Update cumulative receptive field _rf, _s, _p = 1, 1, 0 for fw_idx, _layer in enumerate(info): _rf += _s * (_layer["rf"] - 1) _p += _s * _layer["p"] _s *= _layer["s"] info[fw_idx]["rf"] = _rf info[fw_idx]["s"] = _s info[fw_idx]["p"] = _p return { "overheads": { "cuda": { "pre": cuda_overhead, "fwd": get_process_gpu_ram(os.getpid()) - reserved_ram, }, "framework": {"pre": framework_overhead, "fwd": diff_ram}, }, "layers": info, "overall": { "grad_params": grad_params, "nograd_params": nograd_params, "param_size": param_size, "num_buffers": num_buffers, "buffer_size": buffer_size, }, } def summary( module: Module, input_shape: Tuple[int, ...], wrap_mode: str = "mid", max_depth: Optional[int] = None, receptive_field: bool = False, effective_rf_stats: bool = False, ) -> None: """Print module summary for an expected input tensor shape >>> import torch.nn as nn >>> from torchscan import summary >>> mod = nn.Conv2d(3, 8, 3) >>> summary(mod, (3, 224, 224), receptive_field=True) Args: module: module to inspect input_shape: expected input shapes (don't include batch size) wrap_mode: if a value is too long, where the wrapping should be performed max_depth: maximum depth of layer information receptive_field: whether receptive field estimation should be performed effective_rf_stats: if `receptive_field` is True, displays effective stride and padding """ # Get the summary dict module_info = crawl_module(module, input_shape) # Aggregate until max_depth if isinstance(max_depth, int): module_info = aggregate_info(module_info, max_depth) # Format it and print it print(format_info(module_info, wrap_mode, receptive_field, effective_rf_stats)) # noqa T201 ================================================ FILE: torchscan/modules/__init__.py ================================================ from .flops import * from .macs import * from .memory import * from .receptive import * ================================================ FILE: torchscan/modules/flops.py ================================================ # Copyright (C) 2020-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import warnings from functools import reduce from operator import mul from typing import Tuple import torch from torch import Tensor, nn from torch.nn import Module from torch.nn.modules.batchnorm import _BatchNorm from torch.nn.modules.conv import _ConvNd, _ConvTransposeNd from torch.nn.modules.pooling import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd __all__ = ["module_flops"] def module_flops(module: Module, inputs: Tuple[Tensor, ...], out: Tensor) -> int: """Estimate the number of floating point operations performed by the module Args: module: PyTorch module inputs: input to the module out: output of the module Returns: number of FLOPs """ if isinstance(module, (nn.Identity, nn.Flatten)): return 0 if isinstance(module, nn.Linear): return flops_linear(module, inputs) if isinstance(module, nn.ReLU): return flops_relu(module, inputs) if isinstance(module, nn.ELU): return flops_elu(module, inputs) if isinstance(module, nn.LeakyReLU): return flops_leakyrelu(module, inputs) if isinstance(module, nn.ReLU6): return flops_relu6(module, inputs) if isinstance(module, nn.Tanh): return flops_tanh(module, inputs) if isinstance(module, nn.Sigmoid): return flops_sigmoid(module, inputs) if isinstance(module, _ConvTransposeNd): return flops_convtransposend(module, inputs, out) if isinstance(module, _ConvNd): return flops_convnd(module, inputs, out) if isinstance(module, _BatchNorm): return flops_bn(module, inputs) if isinstance(module, _MaxPoolNd): return flops_maxpool(module, inputs, out) if isinstance(module, _AvgPoolNd): return flops_avgpool(module, inputs, out) if isinstance(module, _AdaptiveMaxPoolNd): return flops_adaptive_maxpool(module, inputs, out) if isinstance(module, _AdaptiveAvgPoolNd): return flops_adaptive_avgpool(module, inputs, out) if isinstance(module, nn.Dropout): return flops_dropout(module, inputs) if isinstance(module, nn.Transformer): return flops_transformer(module, inputs) warnings.warn(f"Module type not supported: {module.__class__.__name__}", stacklevel=1) return 0 def flops_linear(module: nn.Linear, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.Linear`""" # batch size * out_chan * in_chan num_out_feats = module.out_features * reduce(mul, inputs[0].shape[:-1]) mm_flops = num_out_feats * (2 * module.in_features - 1) bias_flops = num_out_feats if module.bias is not None else 0 return mm_flops + bias_flops def flops_sigmoid(_: nn.Sigmoid, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.Sigmoid`""" # For each element, mul by -1, exp it, add 1, div return inputs[0].numel() * 4 def flops_relu(_: nn.ReLU, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.ReLU`""" # Each element is compared to 0 return inputs[0].numel() def flops_elu(_: nn.ELU, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.ELU`""" # For each element, compare it to 0, exp it, sub 1, mul by alpha, compare it to 0 and sum both return inputs[0].numel() * 6 def flops_leakyrelu(_: nn.LeakyReLU, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.LeakyReLU`""" # For each element, compare it to 0 (max), compare it to 0 (min), mul by slope and sum both return inputs[0].numel() * 4 def flops_relu6(_: nn.ReLU6, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.ReLU6`""" # For each element, compare it to 0 (max), compare it to 0 (min), mul by slope and sum both return inputs[0].numel() * 2 def flops_tanh(_: nn.Tanh, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.Tanh`""" # For each element, exp it, mul by -1 and exp it, divide the sub by the add return inputs[0].numel() * 6 def flops_dropout(module: nn.Dropout, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.Dropout`""" if module.p > 0: # Sample a random number for each input element return inputs[0].numel() return 0 def flops_convtransposend(module: _ConvTransposeNd, inputs: Tuple[Tensor, ...], out: Tensor) -> int: """FLOPs estimation for `torch.nn.modules.conv._ConvTranposeNd`""" # Padding (# cf. https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py#L496-L532) # Define min and max sizes padding_flops = len(module.kernel_size) * 8 # Once padding is determined, the operations are almost identical to those of a convolution conv_flops = flops_convnd(module, inputs, out) return padding_flops + conv_flops def flops_convnd(module: _ConvNd, inputs: Tuple[Tensor, ...], out: Tensor) -> int: """FLOPs estimation for `torch.nn.modules.conv._ConvNd`""" # For each position, # mult = kernel size, # adds = kernel size - 1 window_flops_per_chan = 2 * reduce(mul, module.kernel_size) - 1 # Connections to input channels is controlled by the group parameter effective_in_chan = inputs[0].shape[1] // module.groups # N * flops + (N - 1) additions window_flops = effective_in_chan * window_flops_per_chan + (effective_in_chan - 1) conv_flops = out.numel() * window_flops # Each output element gets a bias addition bias_flops = out.numel() if module.bias is not None else 0 return conv_flops + bias_flops def flops_bn(module: _BatchNorm, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.modules.batchnorm._BatchNorm`""" # for each channel, add eps and running_var, sqrt it norm_ops = module.num_features * 2 # For each element, sub running_mean, div by denom norm_ops += inputs[0].numel() * 2 # For each element, mul by gamma, add beta scale_ops = inputs[0].numel() * 2 if module.affine else 0 bn_flops = norm_ops + scale_ops # Count tracking stats update ops # cf. https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py#L94-L101 tracking_flops = 0 if module.track_running_stats and module.training: # exponential_average_factor if module.momentum is None: tracking_flops += 1 # running_mean: by channel, sum values and div by batch size tracking_flops += inputs[0].numel() # running_var: by channel, sub mean and square values, sum them, divide by batch size tracking_flops += 3 * inputs[0].numel() # Update both runnning stat: rescale previous value (mul by N), add it the new one, then div by (N + 1) tracking_flops += 2 * module.num_features * 3 return bn_flops + tracking_flops def flops_maxpool(module: _MaxPoolNd, _: Tuple[Tensor, ...], out: Tensor) -> int: """FLOPs estimation for `torch.nn.modules.pooling._MaxPoolNd`""" k_size = reduce(mul, module.kernel_size) if isinstance(module.kernel_size, tuple) else module.kernel_size # for each spatial output element, check max element in kernel scope return out.numel() * (k_size - 1) def flops_avgpool(module: _AvgPoolNd, inputs: Tuple[Tensor, ...], out: Tensor) -> int: """FLOPs estimation for `torch.nn.modules.pooling._AvgPoolNd`""" k_size = reduce(mul, module.kernel_size) if isinstance(module.kernel_size, tuple) else module.kernel_size # for each spatial output element, sum elements in kernel scope and div by kernel size return out.numel() * (k_size - 1 + inputs[0].ndim - 2) def flops_adaptive_maxpool(_: _AdaptiveMaxPoolNd, inputs: Tuple[Tensor, ...], out: Tensor) -> int: """FLOPs estimation for `torch.nn.modules.pooling._AdaptiveMaxPoolNd`""" # Approximate kernel_size using ratio of spatial shapes between input and output kernel_size = tuple( i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 for i_size, o_size in zip(inputs[0].shape[2:], out.shape[2:], strict=False) ) # for each spatial output element, check max element in kernel scope return out.numel() * (reduce(mul, kernel_size) - 1) def flops_adaptive_avgpool(_: _AdaptiveAvgPoolNd, inputs: Tuple[Tensor, ...], out: Tensor) -> int: """FLOPs estimation for `torch.nn.modules.pooling._AdaptiveAvgPoolNd`""" # Approximate kernel_size using ratio of spatial shapes between input and output kernel_size = tuple( i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 for i_size, o_size in zip(inputs[0].shape[2:], out.shape[2:], strict=False) ) # for each spatial output element, sum elements in kernel scope and div by kernel size return out.numel() * (reduce(mul, kernel_size) - 1 + len(kernel_size)) def flops_layernorm(module: nn.LayerNorm, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.modules.batchnorm._BatchNorm`""" # Compute current mean norm_ops = reduce(mul, module.normalized_shape) * inputs[0].shape[: -len(module.normalized_shape)].numel() # current var (sub the mean, square it, sum them, divide by remaining shape) norm_ops += 3 * inputs[0].numel() # for each channel, add eps and running_var, sqrt it norm_ops += reduce(mul, module.normalized_shape) * 2 # For each element, sub running_mean, div by denom norm_ops += inputs[0].numel() * 2 # For each element, mul by gamma, add beta scale_ops = inputs[0].numel() * 2 if module.elementwise_affine else 0 return norm_ops + scale_ops def flops_mha(module: nn.MultiheadAttention, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.MultiheadAttention`""" # Input projection q, k, _ = inputs[:3] batch_size = q.shape[1] if module._qkv_same_embed_dim: tot_flops = 3 * flops_linear( nn.Linear( module.in_proj_weight.shape[1], module.in_proj_weight.shape[0], bias=module.in_proj_bias is not None ), (torch.empty((batch_size, module.in_proj_weight.shape[1])),), ) else: tot_flops = flops_linear( nn.Linear( module.q_proj_weight.shape[1], module.q_proj_weight.shape[0], bias=module.in_proj_bias is not None ), (torch.empty((batch_size, module.q_proj_weight.shape[1])),), ) tot_flops += flops_linear( nn.Linear(module.k_proj_weight.shape[1], module.k_proj_weight.shape[0], bias=module.bias_k is not None), (torch.empty((batch_size, module.k_proj_weight.shape[1])),), ) tot_flops += flops_linear( nn.Linear(module.v_proj_weight.shape[1], module.v_proj_weight.shape[0], bias=module.bias_v is not None), (torch.empty((batch_size, module.v_proj_weight.shape[1])),), ) # Q (L, B, embed_dim) --> (B * num_heads, L, head_dim=embed_dim / num_heads) # Scaled dot-product attention (cf. https://github.com/pytorch/pytorch/blob/master/torch/nn/functional.py#L5083) # sqrt the embedding dim and div the Q with it tot_flops += 1 + batch_size * module.num_heads * module.head_dim * q.shape[0] # batched matrix multiply tot_flops += batch_size * module.num_heads * (q.shape[0] * k.shape[0]) * (2 * module.head_dim - 1) # attention mask if inputs[-1] is not None: tot_flops += batch_size * module.num_heads * (q.shape[0] * k.shape[0]) # softmax tot_flops += batch_size * module.num_heads * q.shape[0] * (3 * k.shape[0] - 1) # dropout if module.dropout > 0: tot_flops += batch_size * module.num_heads * (q.shape[0] * k.shape[0]) # batched matrix multiply tot_flops += batch_size * module.num_heads * (q.shape[0] * module.head_dim) * (2 * k.shape[0] - 1) # Output linear projection tot_flops += flops_linear(module.out_proj, (torch.empty((q.shape[0], module.out_proj.in_features)),)) return tot_flops def flops_transformer_encoderlayer(module: nn.TransformerEncoderLayer, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.TransformerEncoderLayer`""" tot_flops = flops_mha(module.self_attn, (inputs[0],) * 3) tot_flops += flops_dropout(module.dropout1, inputs) + inputs[0].numel() tot_flops += flops_layernorm(module.norm1, inputs) # get linear 1 output size tot_flops += flops_linear(module.linear1, inputs) tot_flops += module_flops(module.activation, inputs, torch.empty(1)) # type: ignore[arg-type] tot_flops += flops_dropout(module.dropout, inputs) + flops_linear(module.linear2, inputs) # get linear 2 output size tot_flops += flops_dropout(module.dropout2, inputs) + inputs[0].numel() tot_flops += flops_layernorm(module.norm2, inputs) return tot_flops def flops_transformer_decoderlayer(module: nn.TransformerDecoderLayer, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.TransformerEncoderLayer`""" tot_flops = flops_mha(module.self_attn, (inputs[0],) * 3) tot_flops += flops_dropout(module.dropout1, inputs) + inputs[0].numel() tot_flops += flops_layernorm(module.norm1, inputs) tot_flops = flops_mha(module.multihead_attn, (inputs[0], inputs[1], inputs[1])) tot_flops += flops_dropout(module.dropout2, inputs) + inputs[0].numel() tot_flops += flops_layernorm(module.norm2, inputs) # get linear 1 output size tot_flops += flops_linear(module.linear1, inputs) tot_flops += module_flops(module.activation, inputs, torch.empty(1)) # type: ignore[arg-type] tot_flops += flops_dropout(module.dropout, inputs) + flops_linear(module.linear2, inputs) # get linear 2 output size tot_flops += flops_dropout(module.dropout3, inputs) + inputs[0].numel() tot_flops += flops_layernorm(module.norm3, inputs) return tot_flops def flops_transformer(module: nn.Transformer, inputs: Tuple[Tensor, ...]) -> int: """FLOPs estimation for `torch.nn.Transformer`""" encoder_flops = len(module.encoder.layers) * flops_transformer_encoderlayer(module.encoder.layers[0], inputs) if module.encoder.norm is not None: encoder_flops += flops_layernorm(module.encoder.norm, inputs) decoder_flops = len(module.decoder.layers) * flops_transformer_decoderlayer(module.decoder.layers[0], inputs) if module.decoder.norm is not None: decoder_flops += flops_layernorm(module.decoder.norm, inputs) return encoder_flops + decoder_flops ================================================ FILE: torchscan/modules/macs.py ================================================ # Copyright (C) 2020-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import warnings from functools import reduce from operator import mul from torch import Tensor, nn from torch.nn import Module from torch.nn.modules.batchnorm import _BatchNorm from torch.nn.modules.conv import _ConvNd, _ConvTransposeNd from torch.nn.modules.pooling import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd __all__ = ["module_macs"] def module_macs(module: Module, inp: Tensor, out: Tensor) -> int: """Estimate the number of multiply-accumulation operations performed by the module Args: module (torch.nn.Module): PyTorch module inp (torch.Tensor): input to the module out (torch.Tensor): output of the module Returns: int: number of MACs """ if isinstance(module, nn.Linear): return macs_linear(module, inp, out) if isinstance(module, (nn.Identity, nn.ReLU, nn.ELU, nn.LeakyReLU, nn.ReLU6, nn.Tanh, nn.Sigmoid, nn.Flatten)): return 0 if isinstance(module, _ConvTransposeNd): return macs_convtransposend(module, inp, out) if isinstance(module, _ConvNd): return macs_convnd(module, inp, out) if isinstance(module, _BatchNorm): return macs_bn(module, inp, out) if isinstance(module, _MaxPoolNd): return macs_maxpool(module, inp, out) if isinstance(module, _AvgPoolNd): return macs_avgpool(module, inp, out) if isinstance(module, _AdaptiveMaxPoolNd): return macs_adaptive_maxpool(module, inp, out) if isinstance(module, _AdaptiveAvgPoolNd): return macs_adaptive_avgpool(module, inp, out) if isinstance(module, nn.Dropout): return 0 warnings.warn(f"Module type not supported: {module.__class__.__name__}", stacklevel=1) return 0 def macs_linear(module: nn.Linear, _: Tensor, out: Tensor) -> int: """MACs estimation for `torch.nn.Linear`""" # batch size * out_chan * macs_per_elt (bias already counted in accumulation) return module.in_features * reduce(mul, out.shape) def macs_convtransposend(module: _ConvTransposeNd, inp: Tensor, out: Tensor) -> int: """MACs estimation for `torch.nn.modules.conv._ConvTransposeNd`""" # Padding (# cf. https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py#L496-L532) # Define min and max sizes, then subtract them padding_macs = len(module.kernel_size) * 4 # Rest of the operations are almost identical to a convolution (given the padding) conv_macs = macs_convnd(module, inp, out) return padding_macs + conv_macs def macs_convnd(module: _ConvNd, inp: Tensor, out: Tensor) -> int: """MACs estimation for `torch.nn.modules.conv._ConvNd`""" # For each position, # mult = kernel size, # adds = kernel size - 1 window_macs_per_chan = reduce(mul, module.kernel_size) # Connections to input channels is controlled by the group parameter effective_in_chan = inp.shape[1] // module.groups # N * mac window_mac = effective_in_chan * window_macs_per_chan return out.numel() * window_mac # bias already counted in accumulation def macs_bn(module: _BatchNorm, inp: Tensor, _: Tensor) -> int: """MACs estimation for `torch.nn.modules.batchnorm._BatchNorm`""" # sub mean, div by denom norm_mac = 1 # mul by gamma, add beta scale_mac = 1 if module.affine else 0 # Sum everything up bn_mac = inp.numel() * (norm_mac + scale_mac) # Count tracking stats update ops # cf. https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py#L94-L101 tracking_mac = 0 b = inp.shape[0] num_spatial_elts = inp.shape[2:].numel() if module.track_running_stats and module.training: # running_mean: by channel, sum value and div by batch size tracking_mac += module.num_features * (b * num_spatial_elts - 1) # running_var: by channel, sub mean and square values, sum them, divide by batch size active_elts = b * num_spatial_elts tracking_mac += module.num_features * (2 * active_elts - 1) # Update both runnning stat: rescale previous value (mul by N), add it the new one, then div by (N + 1) tracking_mac += 2 * module.num_features * 2 return bn_mac + tracking_mac def macs_maxpool(module: _MaxPoolNd, _: Tensor, out: Tensor) -> int: """MACs estimation for `torch.nn.modules.pooling._MaxPoolNd`""" k_size = reduce(mul, module.kernel_size) if isinstance(module.kernel_size, tuple) else module.kernel_size # for each spatial output element, check max element in kernel scope return out.numel() * (k_size - 1) def macs_avgpool(module: _AvgPoolNd, inp: Tensor, out: Tensor) -> int: """MACs estimation for `torch.nn.modules.pooling._AvgPoolNd`""" k_size = reduce(mul, module.kernel_size) if isinstance(module.kernel_size, tuple) else module.kernel_size # for each spatial output element, sum elements in kernel scope and div by kernel size return out.numel() * (k_size - 1 + inp.ndim - 2) def macs_adaptive_maxpool(_: _AdaptiveMaxPoolNd, inp: Tensor, out: Tensor) -> int: """MACs estimation for `torch.nn.modules.pooling._AdaptiveMaxPoolNd`""" # Approximate kernel_size using ratio of spatial shapes between input and output kernel_size = tuple( i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 for i_size, o_size in zip(inp.shape[2:], out.shape[2:], strict=False) ) # for each spatial output element, check max element in kernel scope return out.numel() * (reduce(mul, kernel_size) - 1) def macs_adaptive_avgpool(_: _AdaptiveAvgPoolNd, inp: Tensor, out: Tensor) -> int: """MACs estimation for `torch.nn.modules.pooling._AdaptiveAvgPoolNd`""" # Approximate kernel_size using ratio of spatial shapes between input and output kernel_size = tuple( i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 for i_size, o_size in zip(inp.shape[2:], out.shape[2:], strict=False) ) # for each spatial output element, sum elements in kernel scope and div by kernel size return out.numel() * (reduce(mul, kernel_size) - 1 + len(kernel_size)) ================================================ FILE: torchscan/modules/memory.py ================================================ # Copyright (C) 2020-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import warnings from functools import reduce from operator import mul from typing import Union from torch import Tensor, nn from torch.nn import Module from torch.nn.modules.batchnorm import _BatchNorm from torch.nn.modules.conv import _ConvNd, _ConvTransposeNd from torch.nn.modules.pooling import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd __all__ = ["module_dmas"] def module_dmas(module: Module, inp: Tensor, out: Tensor) -> int: """Estimate the number of direct memory accesses by the module. The implementation overhead is neglected. Args: module (torch.nn.Module): PyTorch module inp (torch.Tensor): input to the module out (torch.Tensor): output of the module Returns: int: number of DMAs """ if isinstance(module, nn.Identity): return dmas_identity(module, inp, out) if isinstance(module, nn.Flatten): return dmas_flatten(module, inp, out) if isinstance(module, nn.Linear): return dmas_linear(module, inp, out) if isinstance(module, (nn.ReLU, nn.ReLU6)): return dmas_relu(module, inp, out) if isinstance(module, (nn.ELU, nn.LeakyReLU)): return dmas_act_single_param(module, inp, out) if isinstance(module, nn.Sigmoid): return dmas_sigmoid(module, inp, out) if isinstance(module, nn.Tanh): return dmas_tanh(module, inp, out) if isinstance(module, _ConvTransposeNd): return dmas_convtransposend(module, inp, out) if isinstance(module, _ConvNd): return dmas_convnd(module, inp, out) if isinstance(module, _BatchNorm): return dmas_bn(module, inp, out) if isinstance(module, (_MaxPoolNd, _AvgPoolNd)): return dmas_pool(module, inp, out) if isinstance(module, (_AdaptiveMaxPoolNd, _AdaptiveAvgPoolNd)): return dmas_adaptive_pool(module, inp, out) if isinstance(module, nn.Dropout): return dmas_dropout(module, inp, out) warnings.warn(f"Module type not supported: {module.__class__.__name__}", stacklevel=1) return 0 def num_params(module: Module) -> int: """Compute the number of parameters Args: module (torch.nn.Module): PyTorch module Returns: int: number of parameter elements """ return sum(p.data.numel() for p in module.parameters()) def dmas_identity(_: nn.Identity, inp: Tensor, __: Tensor) -> int: """DMAs estimation for `torch.nn.Identity`""" return inp.numel() def dmas_flatten(_: nn.Flatten, inp: Tensor, __: Tensor) -> int: """DMAs estimation for `torch.nn.Flatten`""" return 2 * inp.numel() def dmas_linear(module: nn.Linear, inp: Tensor, out: Tensor) -> int: """DMAs estimation for `torch.nn.Linear`""" input_dma = inp.numel() # Access weight and bias ops_dma = num_params(module) output_dma = out.numel() return input_dma + ops_dma + output_dma def dmas_relu(module: Union[nn.ReLU, nn.ReLU6], inp: Tensor, out: Tensor) -> int: """DMAs estimation for `torch.nn.ReLU`""" input_dma = inp.numel() output_dma = 0 if module.inplace else out.numel() return input_dma + output_dma def dmas_act_single_param(module: Union[nn.ELU, nn.LeakyReLU], inp: Tensor, out: Tensor) -> int: """DMAs estimation for activations with single parameter""" input_dma = inp.numel() # Access alpha, slope or other ops_dma = 1 output_dma = 0 if module.inplace else out.numel() return input_dma + ops_dma + output_dma def dmas_sigmoid(_: nn.Sigmoid, inp: Tensor, out: Tensor) -> int: """DMAs estimation for `torch.nn.Sigmoid`""" # Access for both exp input_dma = inp.numel() output_dma = out.numel() return input_dma + output_dma def dmas_tanh(_: nn.Tanh, inp: Tensor, out: Tensor) -> int: """DMAs estimation for `torch.nn.Tanh`""" # Access for both exp input_dma = inp.numel() * 2 output_dma = out.numel() return input_dma + output_dma def dmas_dropout(module: nn.Dropout, inp: Tensor, out: Tensor) -> int: """DMAs estimation for `torch.nn.Dropout`""" input_dma = inp.numel() # Access sampling probability ops_dma = 1 output_dma = 0 if module.inplace else out.numel() return input_dma + ops_dma + output_dma def dmas_convtransposend(module: _ConvTransposeNd, inp: Tensor, out: Tensor) -> int: """DMAs estimation for `torch.nn.modules.conv._ConvTransposeNd`""" # Padding (# cf. https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/conv.py#L496-L532) # Access stride, padding and kernel_size in_padding = len(module.kernel_size) * 4 out_padding = len(module.kernel_size) # The rest is like a classic convolution conv_dmas = dmas_convnd(module, inp, out) return in_padding + out_padding + conv_dmas def dmas_convnd(module: _ConvNd, _: Tensor, out: Tensor) -> int: """DMAs estimation for `torch.nn.modules.conv._ConvNd`""" # Each output element required K ** 2 memory access of each input channel input_dma = module.in_channels * reduce(mul, module.kernel_size) * out.numel() # Correct with groups input_dma //= module.groups # Access weight & bias ops_dma = num_params(module) output_dma = out.numel() return input_dma + ops_dma + output_dma def dmas_bn(module: _BatchNorm, inp: Tensor, out: Tensor) -> int: """DMAs estimation for `torch.nn.modules.batchnorm._BatchNorm`""" input_dma = inp.numel() # Access running_mean, running_var and eps ops_dma = module.running_mean.numel() + module.running_var.numel() + 1 # type: ignore[union-attr] # Access to weight and bias if module.affine: ops_dma += module.weight.data.numel() + module.bias.data.numel() # Exp avg factor if module.momentum: ops_dma += 1 # Update stats if module.training and module.track_running_stats: # Current mean and std computation only requires access to input, already counted in input_dma # Update num of batches and running stats ops_dma += 1 + module.running_mean.numel() + module.running_var.numel() # type: ignore[union-attr] output_dma = out.numel() return input_dma + ops_dma + output_dma def dmas_pool(module: Union[_MaxPoolNd, _AvgPoolNd], inp: Tensor, out: Tensor) -> int: """DMAs estimation for spatial pooling modules""" # Resolve kernel size and stride size (can be stored as a single integer or a tuple) if isinstance(module.kernel_size, tuple): kernel_size = module.kernel_size elif isinstance(module.kernel_size, int): kernel_size = (module.kernel_size,) * (inp.ndim - 2) # Each output element required K ** 2 memory accesses input_dma = reduce(mul, kernel_size) * out.numel() output_dma = out.numel() return input_dma + output_dma def dmas_adaptive_pool(_: Union[_AdaptiveMaxPoolNd, _AdaptiveAvgPoolNd], inp: Tensor, out: Tensor) -> int: """DMAs estimation for adaptive spatial pooling modules""" # Approximate kernel_size using ratio of spatial shapes between input and output kernel_size = tuple( i_size // o_size if (i_size % o_size) == 0 else i_size - o_size * (i_size // o_size) + 1 for i_size, o_size in zip(inp.shape[2:], out.shape[2:], strict=False) ) # Each output element required K ** 2 memory accesses input_dma = reduce(mul, kernel_size) * out.numel() output_dma = out.numel() return input_dma + output_dma ================================================ FILE: torchscan/modules/receptive.py ================================================ # Copyright (C) 2020-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import math import warnings from typing import Tuple, Union from torch import Tensor, nn from torch.nn import Module from torch.nn.modules.batchnorm import _BatchNorm from torch.nn.modules.conv import _ConvNd, _ConvTransposeNd from torch.nn.modules.pooling import _AdaptiveAvgPoolNd, _AdaptiveMaxPoolNd, _AvgPoolNd, _MaxPoolNd __all__ = ["module_rf"] def module_rf(module: Module, inp: Tensor, out: Tensor) -> Tuple[float, float, float]: """Estimate the spatial receptive field of the module Args: module (torch.nn.Module): PyTorch module inp (torch.Tensor): input to the module out (torch.Tensor): output of the module Returns: receptive field effective stride effective padding """ if isinstance( module, ( nn.Identity, nn.Flatten, nn.ReLU, nn.ELU, nn.LeakyReLU, nn.ReLU6, nn.Tanh, nn.Sigmoid, _BatchNorm, nn.Dropout, nn.Linear, ), ): return 1.0, 1.0, 0.0 if isinstance(module, _ConvTransposeNd): return rf_convtransposend(module, inp, out) if isinstance(module, (_ConvNd, _MaxPoolNd, _AvgPoolNd)): return rf_aggregnd(module, inp, out) if isinstance(module, (_AdaptiveMaxPoolNd, _AdaptiveAvgPoolNd)): return rf_adaptive_poolnd(module, inp, out) warnings.warn(f"Module type not supported: {module.__class__.__name__}", stacklevel=1) return 1.0, 1.0, 0.0 def rf_convtransposend(module: _ConvTransposeNd, _: Tensor, __: Tensor) -> Tuple[float, float, float]: k = module.kernel_size[0] if isinstance(module.kernel_size, tuple) else module.kernel_size s = module.stride[0] if isinstance(module.stride, tuple) else module.stride return -k, 1.0 / s, 0.0 def rf_aggregnd(module: Union[_ConvNd, _MaxPoolNd, _AvgPoolNd], _: Tensor, __: Tensor) -> Tuple[float, float, float]: k = module.kernel_size[0] if isinstance(module.kernel_size, tuple) else module.kernel_size if hasattr(module, "dilation"): d = module.dilation[0] if isinstance(module.dilation, tuple) else module.dilation k = d * (k - 1) + 1 s = module.stride[0] if isinstance(module.stride, tuple) else module.stride p = module.padding[0] if isinstance(module.padding, tuple) else module.padding return k, s, p # type: ignore[return-value] def rf_adaptive_poolnd( _: Union[_AdaptiveMaxPoolNd, _AdaptiveAvgPoolNd], inp: Tensor, out: Tensor ) -> Tuple[int, int, float]: stride = math.ceil(inp.shape[-1] / out.shape[-1]) kernel_size = stride padding = (inp.shape[-1] - kernel_size * stride) / 2 return kernel_size, stride, padding ================================================ FILE: torchscan/process/__init__.py ================================================ from .memory import * ================================================ FILE: torchscan/process/memory.py ================================================ # Copyright (C) 2020-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. import re import subprocess # noqa S404 import warnings import torch __all__ = ["get_process_gpu_ram"] def get_process_gpu_ram(pid: int) -> float: """Gets the amount of RAM used by a given process on GPU devices Args: pid: process ID Returns: RAM usage in Megabytes """ # PyTorch is not responsible for GPU usage if not torch.cuda.is_available(): warnings.warn("CUDA is unavailable to PyTorch.", stacklevel=1) return 0.0 # Query the running processes on GPUs try: res = subprocess.run(["nvidia-smi", "-q", "-d", "PIDS"], capture_output=True).stdout.decode() # Try to locate the process pids = re.findall(r"Process ID\s+:\s([^\D]*)", res) for idx, _pid in enumerate(pids): if int(_pid) == pid: return float(re.findall(r"Used GPU Memory\s+:\s([^\D]*)", res)[idx]) # Query total memory used by nvidia res = subprocess.run( ["nvidia-smi", "--query-gpu=memory.used", "--format=csv"], capture_output=True ).stdout.decode() return float(res.split("\n")[1].split()[0]) except FileNotFoundError as e: warnings.warn(f"raised: {e}. Parsing NVIDIA-SMI failed.", stacklevel=1) # Default to overall RAM usage for this process on the GPU ram_str = torch.cuda.list_gpu_processes().split("\n") # Take the first process running on the GPU if ram_str[1].startswith("process"): return float(ram_str[1].split()[3]) # Otherwise assume the process is running exclusively on CPU return 0.0 ================================================ FILE: torchscan/utils.py ================================================ # Copyright (C) 2020-2024, François-Guillaume Fernandez. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from itertools import starmap from typing import Any, Dict, List, Optional, Tuple def format_name(name: str, depth: int = 0) -> str: """Format a string for nested data printing Args: name: input string depth: depth of the nested information Returns: formatted string """ if depth == 0: return name if depth == 1: return f"├─{name}" return f"{'| ' * (depth - 1)}└─{name}" def wrap_string(s: str, max_len: int, delimiter: str = ".", wrap: str = "[...]", mode: str = "end") -> str: """Wrap a string into a given length Args: s: input string max_len: maximum string length delimiter: character used for delimiting information categories wrap: wrapping sequence used mode: wrapping mode Returns: wrapped string """ if len(s) <= max_len or mode is None: return s if mode == "end": return s[: max_len - len(wrap)] + wrap if mode == "mid": final_part = s.rpartition(delimiter)[-1] wrapped_end = f"{wrap}.{final_part}" return s[: max_len - len(wrapped_end)] + wrapped_end raise ValueError("received an unexpected value of argument `mode`") def unit_scale(val: float) -> Tuple[float, str]: """Rescale value using scale units Args: val: input value Returns: tuple of rescaled value and unit """ if val // 1e12 > 0: return val / 1e12, "T" if val // 1e9 > 0: return val / 1e9, "G" if val // 1e6 > 0: return val / 1e6, "M" if val // 1e3 > 0: return val / 1e3, "k" return val, "" def format_s(f_string: str, min_w: Optional[int] = None, max_w: Optional[int] = None) -> str: """Format number strings""" if isinstance(min_w, int): f_string = f"{f_string:<{min_w}}" if isinstance(max_w, int): f_string = f"{f_string:.{max_w}}" return f_string def format_line_str( layer: Dict[str, Any], col_w: Optional[List[int]] = None, wrap_mode: str = "mid", receptive_field: bool = False, effective_rf_stats: bool = False, ) -> List[str]: """Wrap all information into multiple lines""" if not isinstance(col_w, list): col_w = [None] * 7 # type: ignore[list-item] max_len = col_w[0] + 3 if isinstance(col_w[0], int) else 100 line_str = [ format_s(wrap_string(format_name(layer["name"], layer["depth"]), max_len, mode=wrap_mode), col_w[0], col_w[0]), format_s(layer["type"], col_w[1], col_w[1]), format_s(str(layer["output_shape"]), col_w[2], col_w[2]), format_s(f"{layer['grad_params'] + layer['nograd_params'] + layer['num_buffers']:,}", col_w[3], col_w[3]), ] if receptive_field: line_str.append(format_s(f"{layer['rf']:.0f}", col_w[4], col_w[4])) if effective_rf_stats: line_str.extend(( format_s(f"{layer['s']:.0f}", col_w[5], col_w[5]), format_s(f"{layer['p']:.0f}", col_w[6], col_w[6]), )) return line_str def format_info( module_info: Dict[str, Any], wrap_mode: str = "mid", receptive_field: bool = False, effective_rf_stats: bool = False ) -> str: """Print module summary for an expected input tensor shape Args: module_info: dictionary output of `crawl_module` wrap_mode: wrapping mode receptive_field: whether to display receptive field effective_rf_stats: if `receptive_field` is True, displays effective stride and padding Returns: formatted information """ # Set margin between cols margin = 4 # Dynamic col width # Init with headers headers = ["Layer", "Type", "Output Shape", "Param #", "Receptive field", "Effective stride", "Effective padding"] max_w = [27, 20, 25, 15, 15, 16, 17] col_w = [len(s) for s in headers] for layer in module_info["layers"]: col_w = [ max(v, len(s)) for v, s in zip( col_w, format_line_str(layer, col_w=None, wrap_mode=wrap_mode, receptive_field=True, effective_rf_stats=True), strict=False, ) ] # Truncate columns that are too long col_w = list(starmap(min, zip(col_w, max_w, strict=False))) if not receptive_field: col_w = col_w[:4] headers = headers[:4] elif not effective_rf_stats: col_w = col_w[:5] headers = headers[:5] # Define separating lines line_length = sum(col_w) + (len(col_w) - 1) * margin thin_line = "_" * line_length thick_line = "=" * line_length dot_line = "-" * line_length margin_str = " " * margin # Header info_str = [ thin_line, margin_str.join([f"{col_name:<{col_w}}" for col_name, col_w in zip(headers, col_w, strict=False)]), thick_line, ] # Layers for layer in module_info["layers"]: line_str = format_line_str(layer, col_w, wrap_mode, receptive_field, effective_rf_stats) info_str.append((" " * margin).join(line_str)) # Parameter information num_params = module_info["overall"]["grad_params"] + module_info["overall"]["nograd_params"] info_str.extend(( thick_line, f"Trainable params: {module_info['overall']['grad_params']:,}", f"Non-trainable params: {module_info['overall']['nograd_params']:,}", f"Total params: {num_params:,}", )) # Static RAM usage info_str.append(dot_line) # Convert to Megabytes param_size = (module_info["overall"]["param_size"] + module_info["overall"]["buffer_size"]) / 1024**2 overhead = module_info["overheads"]["framework"]["fwd"] + module_info["overheads"]["cuda"]["fwd"] info_str.extend(( f"Model size (params + buffers): {param_size:.2f} Mb", f"Framework & CUDA overhead: {overhead:.2f} Mb", f"Total RAM usage: {param_size + overhead:.2f} Mb", )) # FLOPS information info_str.append(dot_line) flops, flops_units = unit_scale(sum(layer["flops"] for layer in module_info["layers"])) macs, macs_units = unit_scale(sum(layer["macs"] for layer in module_info["layers"])) dmas, dmas_units = unit_scale(sum(layer["dmas"] for layer in module_info["layers"])) info_str.extend(( f"Floating Point Operations on forward: {flops:.2f} {flops_units}FLOPs", f"Multiply-Accumulations on forward: {macs:.2f} {macs_units}MACs", f"Direct memory accesses on forward: {dmas:.2f} {dmas_units}DMAs", thin_line, )) return "\n".join(info_str) def aggregate_info(info: Dict[str, Any], max_depth: int) -> Dict[str, Any]: """Aggregate module information to a maximum depth Args: info: dictionary output of `crawl_module` max_depth: depth at which parent node aggregates children information Returns: edited dictionary information """ if not any(layer["depth"] == max_depth for layer in info["layers"]): raise ValueError("The `max_depth` argument cannot be higher than module depth.") for fw_idx, layer in enumerate(info["layers"]): # Need to aggregate information if not layer["is_leaf"] and layer["depth"] == max_depth: grad_p, nograd_p, p_size, num_buffers, b_size = 0, 0, 0, 0, 0 flops, macs, dmas = 0, 0, 0 for _layer in info["layers"][fw_idx + 1 :]: # Children have superior depth and were hooked after parent if _layer["depth"] <= max_depth: break # Aggregate all information (flops, macc, ram) flops += _layer["flops"] macs += _layer["macs"] dmas += _layer["dmas"] grad_p += _layer["grad_params"] nograd_p += _layer["nograd_params"] p_size += _layer["param_size"] num_buffers += _layer["num_buffers"] b_size += _layer["buffer_size"] # Take last child effective RF _rf, _s, _p = _layer["rf"], _layer["s"], _layer["p"] # Update info info["layers"][fw_idx]["flops"] = flops info["layers"][fw_idx]["macs"] = macs info["layers"][fw_idx]["dmas"] = dmas info["layers"][fw_idx]["rf"] = _rf info["layers"][fw_idx]["s"] = _s info["layers"][fw_idx]["p"] = _p info["layers"][fw_idx]["grad_params"] = grad_p info["layers"][fw_idx]["nograd_params"] = nograd_p info["layers"][fw_idx]["param_size"] = p_size info["layers"][fw_idx]["num_buffers"] = num_buffers info["layers"][fw_idx]["buffer_size"] = b_size # Filter out further depth information info["layers"] = [layer for layer in info["layers"] if layer["depth"] <= max_depth] return info