Full Code of pytorch/tensorpipe for AI

main b4b77d1006e7 cached
292 files
1.2 MB
314.9k tokens
883 symbols
1 requests
Download .txt
Showing preview only (1,290K chars total). Download the full file or copy to clipboard to get everything.
Repository: pytorch/tensorpipe
Branch: main
Commit: b4b77d1006e7
Files: 292
Total size: 1.2 MB

Directory structure:
gitextract_wzzfsv6c/

├── .circleci/
│   ├── Dockerfile.cuda10.1
│   ├── Dockerfile.cuda10.2
│   ├── Dockerfile.cuda11.0
│   ├── Dockerfile.cuda11.1
│   ├── Dockerfile.cuda9.2
│   └── config.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── README.md
├── cmake/
│   ├── FindPackageHandleStandardArgs.cmake
│   ├── FindPackageMessage.cmake
│   ├── Finduv.cmake
│   ├── MiscCheck.cmake
│   ├── Options.cmake
│   └── Sanitize.cmake
├── docs/
│   ├── cuda_gotchas.md
│   ├── development.md
│   ├── linux_support.md
│   ├── shm.md
│   └── thread_model.md
├── setup.py
├── tensorpipe/
│   ├── .clang-format
│   ├── .clang-tidy
│   ├── CMakeLists.txt
│   ├── benchmark/
│   │   ├── CMakeLists.txt
│   │   ├── benchmark_pipe.cc
│   │   ├── benchmark_transport.cc
│   │   ├── channel_registry.cc
│   │   ├── channel_registry.h
│   │   ├── measurements.h
│   │   ├── options.cc
│   │   ├── options.h
│   │   ├── registry.h
│   │   ├── transport_registry.cc
│   │   └── transport_registry.h
│   ├── channel/
│   │   ├── basic/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── channel.h
│   │   ├── channel_boilerplate.h
│   │   ├── channel_impl_boilerplate.h
│   │   ├── cma/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── context.h
│   │   ├── context_boilerplate.h
│   │   ├── context_impl_boilerplate.h
│   │   ├── cuda_basic/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── constants.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── cuda_gdr/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── constants.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── error.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── cuda_ipc/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── constants.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── cuda_xth/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── error.cc
│   │   ├── error.h
│   │   ├── helpers.cc
│   │   ├── helpers.h
│   │   ├── mpt/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   ├── factory.h
│   │   │   └── nop_types.h
│   │   └── xth/
│   │       ├── channel_impl.cc
│   │       ├── channel_impl.h
│   │       ├── context_impl.cc
│   │       ├── context_impl.h
│   │       ├── factory.cc
│   │       └── factory.h
│   ├── common/
│   │   ├── address.cc
│   │   ├── address.h
│   │   ├── allocator.cc
│   │   ├── allocator.h
│   │   ├── buffer.h
│   │   ├── busy_polling_loop.h
│   │   ├── callback.h
│   │   ├── cpu_buffer.h
│   │   ├── cuda.h
│   │   ├── cuda_buffer.cc
│   │   ├── cuda_buffer.h
│   │   ├── cuda_lib.h
│   │   ├── cuda_loop.cc
│   │   ├── cuda_loop.h
│   │   ├── deferred_executor.h
│   │   ├── defs.h
│   │   ├── device.h
│   │   ├── dl.h
│   │   ├── epoll_loop.cc
│   │   ├── epoll_loop.h
│   │   ├── error.cc
│   │   ├── error.h
│   │   ├── error_macros.h
│   │   ├── fd.cc
│   │   ├── fd.h
│   │   ├── ibv.cc
│   │   ├── ibv.h
│   │   ├── ibv_lib.h
│   │   ├── memory.h
│   │   ├── nop.h
│   │   ├── nvml_lib.h
│   │   ├── optional.h
│   │   ├── queue.h
│   │   ├── ringbuffer.h
│   │   ├── ringbuffer_read_write_ops.h
│   │   ├── ringbuffer_role.h
│   │   ├── shm_ringbuffer.h
│   │   ├── shm_segment.cc
│   │   ├── shm_segment.h
│   │   ├── socket.cc
│   │   ├── socket.h
│   │   ├── state_machine.h
│   │   ├── stream_read_write_ops.h
│   │   ├── strings.h
│   │   ├── system.cc
│   │   └── system.h
│   ├── config.h.in
│   ├── config_cuda.h.in
│   ├── core/
│   │   ├── context.cc
│   │   ├── context.h
│   │   ├── context_impl.cc
│   │   ├── context_impl.h
│   │   ├── error.cc
│   │   ├── error.h
│   │   ├── listener.cc
│   │   ├── listener.h
│   │   ├── listener_impl.cc
│   │   ├── listener_impl.h
│   │   ├── message.h
│   │   ├── nop_types.h
│   │   ├── pipe.cc
│   │   ├── pipe.h
│   │   ├── pipe_impl.cc
│   │   └── pipe_impl.h
│   ├── misc/
│   │   ├── CMakeLists.txt
│   │   └── dump_state_machine.cc
│   ├── python/
│   │   ├── CMakeLists.txt
│   │   └── tensorpipe.cc
│   ├── tensorpipe.h
│   ├── tensorpipe_cuda.h
│   ├── test/
│   │   ├── CMakeLists.txt
│   │   ├── channel/
│   │   │   ├── basic/
│   │   │   │   └── basic_test.cc
│   │   │   ├── channel_test.cc
│   │   │   ├── channel_test.h
│   │   │   ├── channel_test_cpu.cc
│   │   │   ├── channel_test_cpu.h
│   │   │   ├── channel_test_cuda.cc
│   │   │   ├── channel_test_cuda.h
│   │   │   ├── channel_test_cuda_multi_gpu.cc
│   │   │   ├── channel_test_cuda_xdtt.cc
│   │   │   ├── cma/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── cma_test.cc
│   │   │   │   ├── docker_tests.sh
│   │   │   │   ├── probe.cc
│   │   │   │   └── probe_report_checker.py
│   │   │   ├── cuda_basic/
│   │   │   │   └── cuda_basic_test.cc
│   │   │   ├── cuda_gdr/
│   │   │   │   └── cuda_gdr_test.cc
│   │   │   ├── cuda_helpers.h
│   │   │   ├── cuda_ipc/
│   │   │   │   └── cuda_ipc_test.cc
│   │   │   ├── cuda_xth/
│   │   │   │   └── cuda_xth_test.cc
│   │   │   ├── kernel.cu
│   │   │   ├── kernel.cuh
│   │   │   ├── mpt/
│   │   │   │   └── mpt_test.cc
│   │   │   └── xth/
│   │   │       └── xth_test.cc
│   │   ├── common/
│   │   │   ├── cuda_test.cc
│   │   │   ├── defs_test.cc
│   │   │   ├── epoll_loop_test.cc
│   │   │   ├── ringbuffer_test.cc
│   │   │   ├── shm_ringbuffer_test.cc
│   │   │   ├── shm_segment_test.cc
│   │   │   └── system_test.cc
│   │   ├── core/
│   │   │   ├── context_test.cc
│   │   │   ├── listener_test.cc
│   │   │   ├── pipe_cuda_test.cc
│   │   │   ├── pipe_test.cc
│   │   │   └── pipe_test.h
│   │   ├── peer_group.h
│   │   ├── python/
│   │   │   └── tensorpipe.py
│   │   ├── test.cc
│   │   ├── test_environment.cc
│   │   ├── test_environment.h
│   │   └── transport/
│   │       ├── connection_test.cc
│   │       ├── context_test.cc
│   │       ├── ibv/
│   │       │   ├── connection_test.cc
│   │       │   ├── context_test.cc
│   │       │   ├── ibv_test.cc
│   │       │   ├── ibv_test.h
│   │       │   └── sockaddr_test.cc
│   │       ├── listener_test.cc
│   │       ├── shm/
│   │       │   ├── connection_test.cc
│   │       │   ├── listener_test.cc
│   │       │   ├── reactor_test.cc
│   │       │   ├── shm_test.cc
│   │       │   ├── shm_test.h
│   │       │   └── sockaddr_test.cc
│   │       ├── transport_test.h
│   │       └── uv/
│   │           ├── connection_test.cc
│   │           ├── context_test.cc
│   │           ├── loop_test.cc
│   │           ├── sockaddr_test.cc
│   │           ├── uv_test.cc
│   │           └── uv_test.h
│   └── transport/
│       ├── connection.h
│       ├── connection_boilerplate.h
│       ├── connection_impl_boilerplate.h
│       ├── context.h
│       ├── context_boilerplate.h
│       ├── context_impl_boilerplate.h
│       ├── error.cc
│       ├── error.h
│       ├── ibv/
│       │   ├── connection_impl.cc
│       │   ├── connection_impl.h
│       │   ├── constants.h
│       │   ├── context_impl.cc
│       │   ├── context_impl.h
│       │   ├── error.cc
│       │   ├── error.h
│       │   ├── factory.cc
│       │   ├── factory.h
│       │   ├── listener_impl.cc
│       │   ├── listener_impl.h
│       │   ├── reactor.cc
│       │   ├── reactor.h
│       │   ├── sockaddr.cc
│       │   ├── sockaddr.h
│       │   ├── utility.cc
│       │   └── utility.h
│       ├── listener.h
│       ├── listener_boilerplate.h
│       ├── listener_impl_boilerplate.h
│       ├── shm/
│       │   ├── connection_impl.cc
│       │   ├── connection_impl.h
│       │   ├── context_impl.cc
│       │   ├── context_impl.h
│       │   ├── factory.cc
│       │   ├── factory.h
│       │   ├── listener_impl.cc
│       │   ├── listener_impl.h
│       │   ├── reactor.cc
│       │   ├── reactor.h
│       │   ├── sockaddr.cc
│       │   └── sockaddr.h
│       └── uv/
│           ├── connection_impl.cc
│           ├── connection_impl.h
│           ├── context_impl.cc
│           ├── context_impl.h
│           ├── error.cc
│           ├── error.h
│           ├── factory.cc
│           ├── factory.h
│           ├── listener_impl.cc
│           ├── listener_impl.h
│           ├── loop.cc
│           ├── loop.h
│           ├── sockaddr.cc
│           ├── sockaddr.h
│           ├── utility.cc
│           ├── utility.h
│           └── uv.h
└── third_party/
    └── README.md

================================================
FILE CONTENTS
================================================

================================================
FILE: .circleci/Dockerfile.cuda10.1
================================================
FROM nvidia/cuda:10.1-devel-ubuntu18.04

# Install APT packages.
RUN apt-get update && \
        apt-get install -y build-essential cmake

COPY . /tensorpipe

WORKDIR /tensorpipe


================================================
FILE: .circleci/Dockerfile.cuda10.2
================================================
FROM nvidia/cuda:10.2-devel-ubuntu18.04

# Install APT packages.
RUN apt-get update && \
        apt-get install -y build-essential cmake

COPY . /tensorpipe

WORKDIR /tensorpipe


================================================
FILE: .circleci/Dockerfile.cuda11.0
================================================
FROM nvidia/cuda:11.0-devel-ubuntu18.04

# Install APT packages.
RUN apt-get update && \
        apt-get install -y build-essential cmake

COPY . /tensorpipe

WORKDIR /tensorpipe


================================================
FILE: .circleci/Dockerfile.cuda11.1
================================================
FROM nvidia/cuda:11.1-devel-ubuntu18.04

# Install APT packages.
RUN apt-get update && \
        apt-get install -y build-essential cmake

COPY . /tensorpipe

WORKDIR /tensorpipe


================================================
FILE: .circleci/Dockerfile.cuda9.2
================================================
FROM nvidia/cuda:9.2-devel-ubuntu18.04

# Install APT packages.
RUN apt-get update && \
        apt-get install -y build-essential cmake

COPY . /tensorpipe

WORKDIR /tensorpipe


================================================
FILE: .circleci/config.yml
================================================
version: 2.1

jobs:
  build:
    parameters:
      docker_image:
        type: string
        default: ""
      apt_get:
        type: string
        default: ""
      c_compiler:
        type: string
        default: ""
      cxx_compiler:
        type: string
        default: ""
      cmake_args:
        type: string
        default: ""
      nproc:
        type: integer
        default: 20
    docker:
      - image: << parameters.docker_image >>
    steps:
      - checkout
      - run:
          name: Install apt packages
          command: |
            apt-get update
            apt-get install -y git-core build-essential cmake << parameters.apt_get >>
      - run:
          name: Initialize submodules
          command: |
            git submodule init
            git submodule update
      - run:
          name: Build
          command: |
            mkdir build
            cd build
            cmake ../ \
              -DCMAKE_C_FLAGS="-Werror -Wno-deprecated-declarations" \
              -DCMAKE_CXX_FLAGS="-Werror -Wno-deprecated-declarations" \
              -DCMAKE_C_COMPILER=<< parameters.c_compiler >> \
              -DCMAKE_CXX_COMPILER=<< parameters.cxx_compiler >> \
              -DTP_ENABLE_CMA=OFF \
              -DTP_ENABLE_CUDA_IPC=OFF \
              -DTP_ENABLE_IBV=OFF \
              -DTP_BUILD_TESTING=ON \
              << parameters.cmake_args >>
            make -j<<parameters.nproc>>
      - run:
          name: Test
          command: |
            cd build
            ./tensorpipe/test/tensorpipe_test
      - run:
          name: Install
          command: |
            cd build
            make install
  build_gpu:
    parameters:
      cuda_version:
        type: string
      exclude_tests:
        type: string
        default: ""
    machine:
      resource_class: gpu.nvidia.small.multi
      image: ubuntu-1604-cuda-10.1:201909-23
      docker_layer_caching: true
    steps:
      - checkout
      - run:
          name: Initialize submodules
          command: |
            git submodule init
            git submodule update
      - run:
          name: Build/test
          command: |
              docker build -t tensorpipe -f .circleci/Dockerfile.cuda<< parameters.cuda_version >> .
              docker run --gpus all --pid=host tensorpipe sh -c "
                mkdir build && cd build &&
                cmake ../ \
                  -DCMAKE_C_FLAGS=\"-Werror -Wno-deprecated-declarations\" \
                  -DCMAKE_CXX_FLAGS=\"-Werror -Wno-deprecated-declarations\" \
                  -DCUDA_NVCC_FLAGS=\"-gencode arch=compute_61,code=sm_61\" \
                  -DTP_ENABLE_SHM=OFF \
                  -DTP_ENABLE_CMA=OFF \
                  -DTP_USE_CUDA=ON \
                  -DTP_ENABLE_CUDA_IPC=ON \
                  -DTP_ENABLE_IBV=OFF \
                  -DTP_BUILD_TESTING=ON &&
                make -j20 &&
                ./tensorpipe/test/tensorpipe_test --gtest_filter='-<< parameters.exclude_tests >>' &&
                make install"
  bare_metal:
    parameters:
      image:
        type: string
        default: ""
      apt_get:
        type: string
        default: ""
      c_compiler:
        type: string
        default: ""
      cxx_compiler:
        type: string
        default: ""
      cmake_args:
        type: string
        default: ""
      nproc:
        type: integer
        default: 20
    machine:
      image: << parameters.image >>
    steps:
      - checkout
      - run:
          name: Install apt packages
          command: |
            sudo apt-get update
            sudo apt-get install -y git-core build-essential cmake libibverbs1 rdma-core linux-modules-extra-$(uname -r) << parameters.apt_get >>
      - run:
          name: Initialize submodules
          command: |
            git submodule init
            git submodule update
      - run:
          name: Build
          command: |
            mkdir build
            cd build
            cmake ../ \
              -DCMAKE_C_FLAGS="-Werror -Wno-deprecated-declarations" \
              -DCMAKE_CXX_FLAGS="-Werror -Wno-deprecated-declarations" \
              -DCMAKE_C_COMPILER=<< parameters.c_compiler >> \
              -DCMAKE_CXX_COMPILER=<< parameters.cxx_compiler >> \
              -DTP_ENABLE_CUDA_IPC=OFF \
              -DTP_ENABLE_IBV=ON \
              -DTP_BUILD_TESTING=ON \
              << parameters.cmake_args >>
            make -j<<parameters.nproc>>
      - run:
          name: Configure Soft-RoCE (RXE) InfiniBand interface
          command: |
            # Find the name of the first non-loopback IP interface
            INTERFACE_NAME=$(ip link | grep '^2: ' | sed -re 's/2: ([a-z0-9]+): .*/\1/')
            sudo rdma link add rxe0 type rxe netdev $INTERFACE_NAME
      - run:
          name: Test
          command: |
            cd build
            ./tensorpipe/test/tensorpipe_test
      - run:
          name: Test CMA channel autodetection with Docker
          command: |
            bash -eo pipefail tensorpipe/test/channel/cma/docker_tests.sh
      - run:
          name: Install
          command: |
            cd build
            sudo make install
  build_osx:
    macos:
      xcode: 12.4.0
    steps:
      - checkout
      - run:
          name: Install homebrew packages
          command: |
            brew install cmake
      - run:
          name: Initialize submodules
          command: |
            git submodule init
            git submodule update
      - run:
          name: Build
          command: |
            mkdir build
            cd build
            cmake ../ \
              -DCMAKE_C_FLAGS="-Werror -Wno-deprecated-declarations" \
              -DCMAKE_CXX_FLAGS="-Werror -Wno-deprecated-declarations" \
              -DTP_BUILD_TESTING=ON
            make -j
      - run:
          name: Test
          command: |
            cd build
            ./tensorpipe/test/tensorpipe_test
      - run:
          name: Install
          command: |
            cd build
            make install
  python:
    parameters:
      docker_image:
        type: string
        default: ""
      apt_get:
        type: string
        default: ""
    docker:
      - image: << parameters.docker_image >>
    steps:
      - checkout
      - run:
          name: Install apt packages
          command: |
            apt-get update
            apt-get install -y git-core build-essential cmake python3-dev python3-venv << parameters.apt_get >>
      - run:
          name: Initialize submodules
          command: |
            git submodule init
            git submodule update
      - run:
          name: Build
          command: |
            python3 -m venv venv
            source venv/bin/activate
            TP_ENABLE_CMA=OFF TP_ENABLE_CUDA_IPC=OFF TP_ENABLE_IBV=OFF python3 setup.py install
      - run:
          name: Test
          command: |
            source venv/bin/activate
            python3 tensorpipe/test/python/tensorpipe.py
  format:
    docker:
      - image: ubuntu:18.04
    steps:
      - checkout
      - run:
          name: Install clang-format
          command: |
            apt-get update
            apt-get install -y git-core clang-format-10
      - run:
          name: Verify clang-format
          command: |
             git ls-files | grep -E  '\.(cc|h)$' | xargs clang-format-10 -i
             if git diff --quiet; then
               echo "Formatting OK!"
             else
               echo "Formatting not OK!"
               echo "------------------"
               git --no-pager diff --color
               exit 1
             fi

workflows:
  build:
    jobs:
      - build:
          name: gcc5
          docker_image: ubuntu:18.04
          apt_get: "gcc-5 g++-5"
          c_compiler: gcc-5
          cxx_compiler: g++-5
      - build:
          name: gcc7
          docker_image: ubuntu:18.04
          apt_get: "gcc-7 g++-7"
          c_compiler: gcc-7
          cxx_compiler: g++-7
      - build:
          name: clang6
          docker_image: ubuntu:18.04
          apt_get: "clang-6.0"
          c_compiler: clang-6.0
          cxx_compiler: clang++-6.0
      - build:
          name: gcc7-asan
          docker_image: ubuntu:18.04
          apt_get: "gcc-7 g++-7"
          c_compiler: gcc-7
          cxx_compiler: g++-7
          cmake_args: -DSANITIZE=address
      - build:
          name: gcc7-tsan
          docker_image: ubuntu:18.04
          apt_get: "gcc-7 g++-7"
          c_compiler: gcc-7
          cxx_compiler: g++-7
          cmake_args: -DSANITIZE=thread
      - bare_metal:
          name: bare-metal
          image: ubuntu-2004:202008-01
          apt_get: "gcc-7 g++-7"
          c_compiler: gcc-7
          cxx_compiler: g++-7
      - build_gpu:
          name: GPU (CUDA 9.2)
          cuda_version: "9.2"
          # Excluding CudaGdr for lack of InfiniBand hardware, and CudaIpc on
          # multi GPU for lack of p2p capabilities.
          exclude_tests: "CudaGdr*:CudaIpc/CudaMultiGPUChannelTestSuite*"
      - build_gpu:
          name: GPU (CUDA 10.1)
          cuda_version: "10.1"
          # Excluding CudaGdr for lack of InfiniBand hardware, and CudaIpc on
          # multi GPU for lack of p2p capabilities.
          exclude_tests: "CudaGdr*:CudaIpc/CudaMultiGPUChannelTestSuite*"
      - build_gpu:
          name: GPU (CUDA 10.2)
          cuda_version: "10.2"
          # Excluding CudaGdr for lack of InfiniBand hardware, and CudaIpc on
          # multi GPU for lack of p2p capabilities.
          exclude_tests: "CudaGdr*:CudaIpc/CudaMultiGPUChannelTestSuite*"
      - build_gpu:
          name: GPU (CUDA 11.0)
          cuda_version: "11.0"
          # Excluding CudaGdr for lack of InfiniBand hardware, and CudaIpc on
          # multi GPU for lack of p2p capabilities.
          exclude_tests: "CudaGdr*:CudaIpc/CudaMultiGPUChannelTestSuite*"
      - build_gpu:
          name: GPU (CUDA 11.1)
          cuda_version: "11.1"
          # Excluding CudaGdr for lack of InfiniBand hardware, and CudaIpc on
          # multi GPU for lack of p2p capabilities, and CudaBasic/CudaMultiGPUChannelTestSuite.SendAcrossNonDefaultDevices/0
          # because it does not work with CUDA 11.1 (cf. https://github.com/pytorch/tensorpipe/issues/368).
          exclude_tests: "CudaGdr*:CudaIpc/CudaMultiGPUChannelTestSuite*:CudaBasic/CudaMultiGPUChannelTestSuite.SendAcrossNonDefaultDevices/0"
      - build_osx:
          name: OSX
      - python:
          name: python
          docker_image: ubuntu:18.04
          apt_get: "clang-6.0"
      - format:
          name: clang-format


================================================
FILE: .gitignore
================================================
*~
.DS_Store
/build/
/cmake-build-debug/


================================================
FILE: .gitmodules
================================================
[submodule "third_party/pybind11"]
	path = third_party/pybind11
	url = https://github.com/pybind/pybind11.git
[submodule "third_party/libuv"]
	path = third_party/libuv
	url = https://github.com/libuv/libuv.git
	branch = v1.x
[submodule "third_party/googletest"]
	path = third_party/googletest
	url = https://github.com/google/googletest.git
[submodule "third_party/libnop"]
	path = third_party/libnop
	url = https://github.com/google/libnop.git


================================================
FILE: CMakeLists.txt
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.18 FATAL_ERROR)

project(tensorpipe LANGUAGES C CXX)

set(CMAKE_CXX_STANDARD 17)

list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")

# Expose build options.
include(Options)

# Define sanitizer option, if specified.
include(Sanitize)

# Misc checks to cope with various compiler modes.
include(MiscCheck)

add_subdirectory(tensorpipe)

install(EXPORT TensorpipeTargets
        DESTINATION share/cmake/Tensorpipe
        FILE TensorpipeTargets.cmake)


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to make participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery and unwelcome sexual attention or
  advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
  address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.

## Scope

This Code of Conduct applies within all project spaces, and it also applies when
an individual is representing the project or its community in public spaces.
Examples of representing a project or community include using an official
project e-mail address, posting via an official social media account, or acting
as an appointed representative at an online or offline event. Representation of
a project may be further defined and clarified by project maintainers.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at <conduct@pytorch.org>. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to TensorPipe

We want to make contributing to this project as easy and transparent as
possible.

## Our Development Process

This project's source-of-truth is the version in Facebook's internal codebase,
which is continuously synced with the GitHub mirror using
[ShipIt](https://github.com/facebook/fbshipit). Pull requests on GitHub are
copied over using ImportIt (a companion tool for ShipIt).

## Pull Requests

We actively welcome your pull requests.

1. Fork the repo and create your branch from `main`.
2. If you've added code that should be tested, add tests.
3. If you've changed APIs, update the documentation.
4. Ensure the test suite passes.
5. Make sure your code lints.
6. If you haven't already, complete the Contributor License Agreement ("CLA").

## Contributor License Agreement ("CLA")

In order to accept your pull request, we need you to submit a CLA. You only
need to do this once to work on any of Facebook's open source projects.

Complete your CLA here: <https://code.facebook.com/cla>

## Issues

We use GitHub issues to track public bugs. Please ensure your description is
clear and has sufficient instructions to be able to reproduce the issue.

Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the
safe disclosure of security bugs. In those cases, please go through the process
outlined on that page and do not file a public issue.

## Coding Style

This source code is formatted using `clang-format`, with project-specific rules
recorded in the `.clang-format` file.

## License

By contributing to TensorPipe, you agree that your contributions will be
licensed under the LICENSE.txt file in the root directory of this source tree.


================================================
FILE: LICENSE.txt
================================================
BSD License

For TensorPipe software

Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:

 * Redistributions of source code must retain the above copyright notice, this
   list of conditions and the following disclaimer.

 * Redistributions in binary form must reproduce the above copyright notice,
   this list of conditions and the following disclaimer in the documentation
   and/or other materials provided with the distribution.

 * Neither the name Meta nor the names of its contributors may be used to
   endorse or promote products derived from this software without specific
   prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


================================================
FILE: README.md
================================================
# TensorPipe

The TensorPipe project provides a tensor-aware channel to transfer rich objects
from one process to another while using the fastest transport for the tensors
contained therein (e.g., CUDA device-to-device copy).

> :warning: Update (2025-12) tensorpipe is in maintenance mode and no new changes are planned beyond minimal build fixes. Please see https://github.com/meta-pytorch/torchcomms and https://github.com/meta-pytorch/monarch for alternatives.

## Getting started

First clone the repository:

```shell
$ git clone --recursive https://github.com/pytorch/tensorpipe
```

Then, build as follows (using ninja instead of make):

``` shell
$ cd tensorpipe
$ mkdir build
$ cd build
$ cmake ../ -GNinja
$ ninja
```

You can find test executables in `build/tensorpipe/test`.

## Interface

There are four classes you need to know about:

- `tensorpipe::Context`, which keeps track of the global state of the system,
  such as thread pools, open file descriptors, etc.
- `tensorpipe::Listener`, which allows one process to open an entry point for
  other processes to connect to.
- `tensorpipe::Pipe`, the one communication primitive that this entire project
  is about. You can obtain one either by connecting to the listener of another
  process or from such a listener when another process connects to it. Once you
  have a pipe, you can send messages on it, and that's the whole point.
- `tensorpipe::Message`, which is the the language that pipes read and write in.
  Pipes are streams of structured messages (not just raw byte buffers), and a
  message is composed of a "core" payload (memory living on CPU) plus a list of
  tensors (memory living on any device, like GPUs).

Sending a message from one end of the pipe to the other can be achieved using
the `write` method, which takes a message (with the data to send) and a
callback which will be invoked once the sending has completed. This callback
will be invoked with an error (if one happened) and with the message.

Receiving a message takes two steps: on an incoming message, first the pipe
asks you to provide some memory to hold the message in, and then you ask the
pipe to read the data into that memory. In order to do this, first you must
register a callback that will be notified for incoming messages. This is
performed by calling the `readDescriptor` method with said callback. The
callback will be invoked with a so-called descriptor, which can be seen as a
"message skeleton", i.e., a message with no buffers attached to it (they are
set to null pointers). The job of this callback is filling in those buffers,
either by allocating the required memory or by obtaining it from somewhere else
(from a cache, as a slice of a batch that's being assembled, ...). This
descriptor also contains some metadata, given by the sender, which can be used
to provide allocation hints or any other information that can help the receiver
determine where to store the data. Once the message's buffers are ready, you
can tell the pipe to go ahead and fill them in with the incoming data by
passing the message to the `read` method, together with a callback which will
be called when all the data has been received and stored. As when writing, this
callback will be given a (possibly empty) error and the original message. The
`readDescriptor` callback is one-shot, which means that after it fires it
"expires" and will not be called again. It must be re-armed for a new event to
be received.

When you pass a message to the pipe, to send it or to receive into it, you must
not tamper with the underlying memory until the callback has completed, even if
the `write` or `read` call already returned. (The `write` and `read` calls, and
all other calls, are non-blocking so that it's easier to schedule asynchronous
parallel trasfers without having to use threads). This means you can not deallocate
the memory or alter it in any way, as the pipe may still be reading or
modifying it. In other terms, you relinquish control over the memory when you
pass a message to the pipe, only to reacquire it once the message is given back
to you in the callback. This contract is encoded by the requirement to move the
messages into and out of the pipe (using rvalue references). Also, because of
this agreement, all callbacks will always be called, even if the pipe is closed
or if it errors, in order to give back the memory.

The order in which messages are written to a pipe is preserved when these
messages are read on the other side. Moreover, for a given pipe endpoint, the
callbacks of the performed operations are executed in the same order that these
operations were scheduled, even if the operations are performed asynchronously
or out-of-band and thus may overlap or occur out of order. What this means is
that if two write operations are scheduled one after the other back-to-back,
even if the second one completes before the first one, its callback is delayed
until the first one also completes and its callback is invoked. The same
applies for reads. All the callbacks of all the pipes in a given context are
called from the same per-context thread and thus no two callbacks will occur at
the same time. However, different contexts will use different threads and their
callbacks may thus overlap.

All the callbacks are invoked with an error reference. This may be "empty",
i.e., indicate that no error has in fact occurred. In this case, the error
object evaluates to false. In case of an actual error it will instead evaluate
to true. When invoked with an error, the remaining arguments of the callback
may be meaningless. For the `read` and `write` callbacks they will still
contain the message that these methods will be invoked with, but the
`readDescriptor` one will be an empty or invalid message. It should not be
used.

There is no expectation for the `readDescriptor` callback to be armed at all
times. Similarly, it is not necessary to call the `read` method immediately
after a descriptor has been read. Both these possibilities are by design, in
order to allow the user of the pipe to apply some backpressure in case it's
receiving messages at a faster rate than it can handle, or for any other
reason. This backpressure will be propagated to the lower-level components as
as far down as possible (e.g., by stopping listening for readability events on
the socket file descriptor).

## Transports and channels

TensorPipe aims to be "backend-agnostic": it doesn't want to be restricted to a
single way of copying data around but wants to be able to choose the fastest
medium from a library of backends, based on the circumstances (e.g., are the two
processes on the same machine?) and on the available hardware (e.g., are the
GPUs connected with NVLink?). TensorPipe strives to have the largest selection
of backends, enabling users to implement specific backends for their systems
(should the default ones prove limited) and encouraging contributions.

The two processes that are establishing a pipe will automatically negotiate
during setup to determine which of the backends they have at their disposal can
be used and how well they would perform, in order to choose the best one in a
way that is completely transparent to the user.

Backends come in two flavors:

- Transports are the connections used by the pipes to transfer control messages,
  and the (smallish) core payloads. They are meant to be lightweight and
  low-latency. The most basic transport is a simple TCP one, which should work
  in all scenarios. A more optimized one, for example, is based on a ring buffer
  allocated in shared memory, which two processes on the same machine can use to
  communicate by performing just a memory copy, without passing through the
  kernel.

- Channels are where the heavy lifting takes place, as they take care of copying
  the (larger) tensor data. High bandwidths are a requirement. Examples include
  multiplexing chunks of data across multiple TCP sockets and processes, so to
  saturate the NIC's bandwidth. Or using a CUDA memcpy call to transfer memory
  from one GPU to another using NVLink.

These different usage patterns promote different design choices when
implementing transports and channels, which means the two are not perfectly
interchangeable. For example, a TCP-based transport is best implemented using a
single connection, whereas a TCP-based channel will benefit from using multiple
connection and chunk and multiplex the payload over them in order to saturate
the bandwidth even on the most powerful NICs.

Moreover, the APIs of transports and channels put different constraints on
them, which demand and permit different approaches. As a rule of thumb, we
require more from the transports: the only out-of-band information they can use
is a simple address, which is all they can use to bootstrap the connection, and
they need to include some "signaling" capabilities (a write on one side "wakes
up" the other side by causing a read). Channels, on the other hand, have much
looser requirements: they basically just need to implement a `memcpy` and, for
anything beyond that, they can leverage a transport that the pipe gives to them
for support.

## License

TensorPipe is BSD licensed, as found in the [LICENSE.txt](LICENSE.txt) file.


================================================
FILE: cmake/FindPackageHandleStandardArgs.cmake
================================================
# Copyright 2000-2020 Kitware, Inc. and Contributors
# All rights reserved.
#
# Distributed under the OSI-approved BSD 3-Clause License. See
# https://cmake.org/licensing for details.

#[=======================================================================[.rst:
FindPackageHandleStandardArgs
-----------------------------

This module provides a function intended to be used in :ref:`Find Modules`
implementing :command:`find_package(<PackageName>)` calls.  It handles the
``REQUIRED``, ``QUIET`` and version-related arguments of ``find_package``.
It also sets the ``<PackageName>_FOUND`` variable.  The package is
considered found if all variables listed contain valid results, e.g.
valid filepaths.

.. command:: find_package_handle_standard_args

  There are two signatures::

    find_package_handle_standard_args(<PackageName>
      (DEFAULT_MSG|<custom-failure-message>)
      <required-var>...
      )

    find_package_handle_standard_args(<PackageName>
      [FOUND_VAR <result-var>]
      [REQUIRED_VARS <required-var>...]
      [VERSION_VAR <version-var>]
      [HANDLE_COMPONENTS]
      [CONFIG_MODE]
      [FAIL_MESSAGE <custom-failure-message>]
      )

  The ``<PackageName>_FOUND`` variable will be set to ``TRUE`` if all
  the variables ``<required-var>...`` are valid and any optional
  constraints are satisfied, and ``FALSE`` otherwise.  A success or
  failure message may be displayed based on the results and on
  whether the ``REQUIRED`` and/or ``QUIET`` option was given to
  the :command:`find_package` call.

  The options are:

  ``(DEFAULT_MSG|<custom-failure-message>)``
    In the simple signature this specifies the failure message.
    Use ``DEFAULT_MSG`` to ask for a default message to be computed
    (recommended).  Not valid in the full signature.

  ``FOUND_VAR <result-var>``
    Obsolete.  Specifies either ``<PackageName>_FOUND`` or
    ``<PACKAGENAME>_FOUND`` as the result variable.  This exists only
    for compatibility with older versions of CMake and is now ignored.
    Result variables of both names are always set for compatibility.

  ``REQUIRED_VARS <required-var>...``
    Specify the variables which are required for this package.
    These may be named in the generated failure message asking the
    user to set the missing variable values.  Therefore these should
    typically be cache entries such as ``FOO_LIBRARY`` and not output
    variables like ``FOO_LIBRARIES``.

  ``VERSION_VAR <version-var>``
    Specify the name of a variable that holds the version of the package
    that has been found.  This version will be checked against the
    (potentially) specified required version given to the
    :command:`find_package` call, including its ``EXACT`` option.
    The default messages include information about the required
    version and the version which has been actually found, both
    if the version is ok or not.

  ``HANDLE_COMPONENTS``
    Enable handling of package components.  In this case, the command
    will report which components have been found and which are missing,
    and the ``<PackageName>_FOUND`` variable will be set to ``FALSE``
    if any of the required components (i.e. not the ones listed after
    the ``OPTIONAL_COMPONENTS`` option of :command:`find_package`) are
    missing.

  ``CONFIG_MODE``
    Specify that the calling find module is a wrapper around a
    call to ``find_package(<PackageName> NO_MODULE)``.  This implies
    a ``VERSION_VAR`` value of ``<PackageName>_VERSION``.  The command
    will automatically check whether the package configuration file
    was found.

  ``FAIL_MESSAGE <custom-failure-message>``
    Specify a custom failure message instead of using the default
    generated message.  Not recommended.

Example for the simple signature:

.. code-block:: cmake

  find_package_handle_standard_args(LibXml2 DEFAULT_MSG
    LIBXML2_LIBRARY LIBXML2_INCLUDE_DIR)

The ``LibXml2`` package is considered to be found if both
``LIBXML2_LIBRARY`` and ``LIBXML2_INCLUDE_DIR`` are valid.
Then also ``LibXml2_FOUND`` is set to ``TRUE``.  If it is not found
and ``REQUIRED`` was used, it fails with a
:command:`message(FATAL_ERROR)`, independent whether ``QUIET`` was
used or not.  If it is found, success will be reported, including
the content of the first ``<required-var>``.  On repeated CMake runs,
the same message will not be printed again.

Example for the full signature:

.. code-block:: cmake

  find_package_handle_standard_args(LibArchive
    REQUIRED_VARS LibArchive_LIBRARY LibArchive_INCLUDE_DIR
    VERSION_VAR LibArchive_VERSION)

In this case, the ``LibArchive`` package is considered to be found if
both ``LibArchive_LIBRARY`` and ``LibArchive_INCLUDE_DIR`` are valid.
Also the version of ``LibArchive`` will be checked by using the version
contained in ``LibArchive_VERSION``.  Since no ``FAIL_MESSAGE`` is given,
the default messages will be printed.

Another example for the full signature:

.. code-block:: cmake

  find_package(Automoc4 QUIET NO_MODULE HINTS /opt/automoc4)
  find_package_handle_standard_args(Automoc4  CONFIG_MODE)

In this case, a ``FindAutmoc4.cmake`` module wraps a call to
``find_package(Automoc4 NO_MODULE)`` and adds an additional search
directory for ``automoc4``.  Then the call to
``find_package_handle_standard_args`` produces a proper success/failure
message.
#]=======================================================================]

include(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)

# internal helper macro
macro(_FPHSA_FAILURE_MESSAGE _msg)
  if (${_NAME}_FIND_REQUIRED)
    message(FATAL_ERROR "${_msg}")
  else ()
    if (NOT ${_NAME}_FIND_QUIETLY)
      message(STATUS "${_msg}")
    endif ()
  endif ()
endmacro()


# internal helper macro to generate the failure message when used in CONFIG_MODE:
macro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)
  # <name>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:
  if(${_NAME}_CONFIG)
    _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})")
  else()
    # If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.
    # List them all in the error message:
    if(${_NAME}_CONSIDERED_CONFIGS)
      set(configsText "")
      list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)
      math(EXPR configsCount "${configsCount} - 1")
      foreach(currentConfigIndex RANGE ${configsCount})
        list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)
        list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)
        string(APPEND configsText "    ${filename} (version ${version})\n")
      endforeach()
      if (${_NAME}_NOT_FOUND_MESSAGE)
        string(APPEND configsText "    Reason given by package: ${${_NAME}_NOT_FOUND_MESSAGE}\n")
      endif()
      _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:\n${configsText}")

    else()
      # Simple case: No Config-file was found at all:
      _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}")
    endif()
  endif()
endmacro()


function(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)

# Set up the arguments for `cmake_parse_arguments`.
  set(options  CONFIG_MODE  HANDLE_COMPONENTS)
  set(oneValueArgs  FAIL_MESSAGE  VERSION_VAR  FOUND_VAR)
  set(multiValueArgs REQUIRED_VARS)

# Check whether we are in 'simple' or 'extended' mode:
  set(_KEYWORDS_FOR_EXTENDED_MODE  ${options} ${oneValueArgs} ${multiValueArgs} )
  list(FIND _KEYWORDS_FOR_EXTENDED_MODE "${_FIRST_ARG}" INDEX)

  if(${INDEX} EQUAL -1)
    set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})
    set(FPHSA_REQUIRED_VARS ${ARGN})
    set(FPHSA_VERSION_VAR)
  else()
    cmake_parse_arguments(FPHSA "${options}" "${oneValueArgs}" "${multiValueArgs}"  ${_FIRST_ARG} ${ARGN})

    if(FPHSA_UNPARSED_ARGUMENTS)
      message(FATAL_ERROR "Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \"${FPHSA_UNPARSED_ARGUMENTS}\"")
    endif()

    if(NOT FPHSA_FAIL_MESSAGE)
      set(FPHSA_FAIL_MESSAGE  "DEFAULT_MSG")
    endif()

    # In config-mode, we rely on the variable <package>_CONFIG, which is set by find_package()
    # when it successfully found the config-file, including version checking:
    if(FPHSA_CONFIG_MODE)
      list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)
      list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)
      set(FPHSA_VERSION_VAR ${_NAME}_VERSION)
    endif()

    if(NOT FPHSA_REQUIRED_VARS)
      message(FATAL_ERROR "No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()")
    endif()
  endif()

# now that we collected all arguments, process them

  if("x${FPHSA_FAIL_MESSAGE}" STREQUAL "xDEFAULT_MSG")
    set(FPHSA_FAIL_MESSAGE "Could NOT find ${_NAME}")
  endif()

  list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)

  string(TOUPPER ${_NAME} _NAME_UPPER)
  string(TOLOWER ${_NAME} _NAME_LOWER)

  if(FPHSA_FOUND_VAR)
    if(FPHSA_FOUND_VAR MATCHES "^${_NAME}_FOUND$"  OR  FPHSA_FOUND_VAR MATCHES "^${_NAME_UPPER}_FOUND$")
      set(_FOUND_VAR ${FPHSA_FOUND_VAR})
    else()
      message(FATAL_ERROR "The argument for FOUND_VAR is \"${FPHSA_FOUND_VAR}\", but only \"${_NAME}_FOUND\" and \"${_NAME_UPPER}_FOUND\" are valid names.")
    endif()
  else()
    set(_FOUND_VAR ${_NAME_UPPER}_FOUND)
  endif()

  # collect all variables which were not found, so they can be printed, so the
  # user knows better what went wrong (#6375)
  set(MISSING_VARS "")
  set(DETAILS "")
  # check if all passed variables are valid
  set(FPHSA_FOUND_${_NAME} TRUE)
  foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})
    if(NOT ${_CURRENT_VAR})
      set(FPHSA_FOUND_${_NAME} FALSE)
      string(APPEND MISSING_VARS " ${_CURRENT_VAR}")
    else()
      string(APPEND DETAILS "[${${_CURRENT_VAR}}]")
    endif()
  endforeach()
  if(FPHSA_FOUND_${_NAME})
    set(${_NAME}_FOUND TRUE)
    set(${_NAME_UPPER}_FOUND TRUE)
  else()
    set(${_NAME}_FOUND FALSE)
    set(${_NAME_UPPER}_FOUND FALSE)
  endif()

  # component handling
  unset(FOUND_COMPONENTS_MSG)
  unset(MISSING_COMPONENTS_MSG)

  if(FPHSA_HANDLE_COMPONENTS)
    foreach(comp ${${_NAME}_FIND_COMPONENTS})
      if(${_NAME}_${comp}_FOUND)

        if(NOT DEFINED FOUND_COMPONENTS_MSG)
          set(FOUND_COMPONENTS_MSG "found components: ")
        endif()
        string(APPEND FOUND_COMPONENTS_MSG " ${comp}")

      else()

        if(NOT DEFINED MISSING_COMPONENTS_MSG)
          set(MISSING_COMPONENTS_MSG "missing components: ")
        endif()
        string(APPEND MISSING_COMPONENTS_MSG " ${comp}")

        if(${_NAME}_FIND_REQUIRED_${comp})
          set(${_NAME}_FOUND FALSE)
          string(APPEND MISSING_VARS " ${comp}")
        endif()

      endif()
    endforeach()
    set(COMPONENT_MSG "${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}")
    string(APPEND DETAILS "[c${COMPONENT_MSG}]")
  endif()

  # version handling:
  set(VERSION_MSG "")
  set(VERSION_OK TRUE)

  # check with DEFINED here as the requested or found version may be "0"
  if (DEFINED ${_NAME}_FIND_VERSION)
    if(DEFINED ${FPHSA_VERSION_VAR})
      set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})

      if(${_NAME}_FIND_VERSION_EXACT)       # exact version required
        # count the dots in the version string
        string(REGEX REPLACE "[^.]" "" _VERSION_DOTS "${_FOUND_VERSION}")
        # add one dot because there is one dot more than there are components
        string(LENGTH "${_VERSION_DOTS}." _VERSION_DOTS)
        if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)
          # Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT
          # is at most 4 here. Therefore a simple lookup table is used.
          if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)
            set(_VERSION_REGEX "[^.]*")
          elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)
            set(_VERSION_REGEX "[^.]*\\.[^.]*")
          elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)
            set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*")
          else ()
            set(_VERSION_REGEX "[^.]*\\.[^.]*\\.[^.]*\\.[^.]*")
          endif ()
          string(REGEX REPLACE "^(${_VERSION_REGEX})\\..*" "\\1" _VERSION_HEAD "${_FOUND_VERSION}")
          unset(_VERSION_REGEX)
          if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)
            set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
            set(VERSION_OK FALSE)
          else ()
            set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
          endif ()
          unset(_VERSION_HEAD)
        else ()
          if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)
            set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is exact version \"${${_NAME}_FIND_VERSION}\"")
            set(VERSION_OK FALSE)
          else ()
            set(VERSION_MSG "(found suitable exact version \"${_FOUND_VERSION}\")")
          endif ()
        endif ()
        unset(_VERSION_DOTS)

      else()     # minimum version specified:
        if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)
          set(VERSION_MSG "Found unsuitable version \"${_FOUND_VERSION}\", but required is at least \"${${_NAME}_FIND_VERSION}\"")
          set(VERSION_OK FALSE)
        else ()
          set(VERSION_MSG "(found suitable version \"${_FOUND_VERSION}\", minimum required is \"${${_NAME}_FIND_VERSION}\")")
        endif ()
      endif()

    else()

      # if the package was not found, but a version was given, add that to the output:
      if(${_NAME}_FIND_VERSION_EXACT)
         set(VERSION_MSG "(Required is exact version \"${${_NAME}_FIND_VERSION}\")")
      else()
         set(VERSION_MSG "(Required is at least version \"${${_NAME}_FIND_VERSION}\")")
      endif()

    endif()
  else ()
    # Check with DEFINED as the found version may be 0.
    if(DEFINED ${FPHSA_VERSION_VAR})
      set(VERSION_MSG "(found version \"${${FPHSA_VERSION_VAR}}\")")
    endif()
  endif ()

  if(VERSION_OK)
    string(APPEND DETAILS "[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]")
  else()
    set(${_NAME}_FOUND FALSE)
  endif()


  # print the result:
  if (${_NAME}_FOUND)
    FIND_PACKAGE_MESSAGE(${_NAME} "Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}" "${DETAILS}")
  else ()

    if(FPHSA_CONFIG_MODE)
      _FPHSA_HANDLE_FAILURE_CONFIG_MODE()
    else()
      if(NOT VERSION_OK)
        _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})")
      else()
        _FPHSA_FAILURE_MESSAGE("${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}")
      endif()
    endif()

  endif ()

  set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
  set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)
endfunction()


================================================
FILE: cmake/FindPackageMessage.cmake
================================================
# Copyright 2000-2020 Kitware, Inc. and Contributors
# All rights reserved.
#
# Distributed under the OSI-approved BSD 3-Clause License. See
# https://cmake.org/licensing for details.

#.rst:
# FindPackageMessage
# ------------------
#
#
#
# FIND_PACKAGE_MESSAGE(<name> "message for user" "find result details")
#
# This macro is intended to be used in FindXXX.cmake modules files.  It
# will print a message once for each unique find result.  This is useful
# for telling the user where a package was found.  The first argument
# specifies the name (XXX) of the package.  The second argument
# specifies the message to display.  The third argument lists details
# about the find result so that if they change the message will be
# displayed again.  The macro also obeys the QUIET argument to the
# find_package command.
#
# Example:
#
# ::
#
#   if(X11_FOUND)
#     FIND_PACKAGE_MESSAGE(X11 "Found X11: ${X11_X11_LIB}"
#       "[${X11_X11_LIB}][${X11_INCLUDE_DIR}]")
#   else()
#    ...
#   endif()

function(FIND_PACKAGE_MESSAGE pkg msg details)
  # Avoid printing a message repeatedly for the same find result.
  if(NOT ${pkg}_FIND_QUIETLY)
    string(REPLACE "\n" "" details "${details}")
    set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})
    if(NOT "${details}" STREQUAL "${${DETAILS_VAR}}")
      # The message has not yet been printed.
      message(STATUS "${msg}")

      # Save the find details in the cache to avoid printing the same
      # message again.
      set("${DETAILS_VAR}" "${details}"
        CACHE INTERNAL "Details about finding ${pkg}")
    endif()
  endif()
endfunction()


================================================
FILE: cmake/Finduv.cmake
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

#
# Finduv
# ------
#
# Imported Targets
# ^^^^^^^^^^^^^^^^
#
# An imported target named ``uv::uv`` is provided if libuv has been found.
#
# Result Variables
# ^^^^^^^^^^^^^^^^
#
# This module defines the following variables:
#
# ``uv_FOUND``
#   True if libuv was found, false otherwise.
# ``uv_LIBRARY_DIRS``
#   The path(s) to uv libraries.
# ``uv_VERSION``
#   The version of libuv found.
#

find_package(PkgConfig QUIET)

if((NOT TP_BUILD_LIBUV) AND PkgConfig_FOUND)
  pkg_check_modules(uv QUIET IMPORTED_TARGET GLOBAL libuv)
  if(uv_FOUND)
    add_library(uv::uv ALIAS PkgConfig::uv)
  endif()
endif()

if(NOT uv_FOUND)
  set(uv_VERSION "1.51.0")
  set(uv_LIBRARY_DIRS "submodule")

  set(libuv_DIR ${PROJECT_SOURCE_DIR}/third_party/libuv)
  add_subdirectory(${libuv_DIR}
    ${PROJECT_BINARY_DIR}/third_party/libuv
    EXCLUDE_FROM_ALL)

  # This hack duplicates the `uv_a` target, so that we can call
  # install(TARGETS ... EXPORT) on it, which is not possible when the target is
  # defined in a subdirectory in CMake 3.5.
  get_target_property(_uv_sources uv_a SOURCES)
  set(_uv_sources_abs)
  foreach(_uv_src ${_uv_sources})
    list(APPEND _uv_sources_abs "${libuv_DIR}/${_uv_src}")
  endforeach()

  add_library(tensorpipe_uv STATIC ${_uv_sources_abs})
  if(BUILD_SHARED_LIBS)
    set_target_properties(tensorpipe_uv PROPERTIES POSITION_INDEPENDENT_CODE 1)
  endif()

  get_target_property(_link_libs uv_a LINK_LIBRARIES)
  target_link_libraries(tensorpipe_uv PRIVATE ${_link_libs})

  get_target_property(_include_dirs uv_a INCLUDE_DIRECTORIES)
  target_include_directories(tensorpipe_uv PRIVATE ${_include_dirs})
  target_include_directories(tensorpipe_uv PUBLIC $<BUILD_INTERFACE:${libuv_DIR}/include>)

  get_target_property(_compile_definitions uv_a COMPILE_DEFINITIONS)
  target_compile_definitions(tensorpipe_uv PRIVATE ${_compile_definitions})

  get_target_property(_compile_options uv_a COMPILE_OPTIONS)
  target_compile_options(tensorpipe_uv PRIVATE ${_compile_options})

  install(TARGETS tensorpipe_uv
          EXPORT TensorpipeTargets
          ARCHIVE DESTINATION ${TP_INSTALL_LIBDIR})

  add_library(uv::uv ALIAS tensorpipe_uv)
endif()

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(uv
  REQUIRED_VARS uv_VERSION
  VERSION_VAR uv_VERSION)


================================================
FILE: cmake/MiscCheck.cmake
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

include(CheckCXXSourceCompiles)
include(CMakePushCheckState)

# We use the [[nodiscard]] attribute, which GCC 5 complains about.
# Silence this warning if GCC 5 is used.
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6)
    add_definitions("-Wno-attributes")
  endif()
endif()


================================================
FILE: cmake/Options.cmake
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
  set(LINUX ON)
else()
  set(LINUX OFF)
endif()

macro(TP_CONDITIONAL_BACKEND name docstring condition)
  # No clue why this monstrosity is needed. But cmake_dependent_option has it,
  # and the code doesn't seem to work without it.
  string(REGEX REPLACE " +" ";" TP_CONDITIONAL_BACKEND_CONDITION "${condition}")
  if(${TP_CONDITIONAL_BACKEND_CONDITION})
    set(TP_CONDITIONAL_BACKEND_CAN_ENABLE ON)
  else()
    set(TP_CONDITIONAL_BACKEND_CAN_ENABLE OFF)
  endif()
  set(${name} ${TP_CONDITIONAL_BACKEND_CAN_ENABLE} CACHE BOOL ${docstring})
  if(${name} AND NOT ${TP_CONDITIONAL_BACKEND_CAN_ENABLE})
    message(FATAL_ERROR "${name} was explicitly set, but that can't be honored")
  endif()
endmacro()

# Try to auto-detect the presence of some libraries in order to enable/disable
# the transports/channels that make use of them.
# TODO Add CUDA to this list, in order to fix the TODO below

# TODO: Default to ON if CUDA available.
option(TP_USE_CUDA "Enable support for CUDA tensors" OFF)

# Optional features
option(TP_BUILD_BENCHMARK "Build benchmarks" OFF)
option(TP_BUILD_MISC "Build misc tools" OFF)
option(TP_BUILD_PYTHON "Build python bindings" OFF)
option(TP_BUILD_TESTING "Build tests" OFF)

# Whether to build a static or shared library
if(BUILD_SHARED_LIBS)
  set(TP_STATIC_OR_SHARED SHARED CACHE STRING "")
else()
  set(TP_STATIC_OR_SHARED STATIC CACHE STRING "")
endif()
mark_as_advanced(TP_STATIC_OR_SHARED)

# Force to build libuv from the included submodule
option(TP_BUILD_LIBUV "Build libuv from source" OFF)

# Directories
include(GNUInstallDirs)
set(TP_INSTALL_LIBDIR ${CMAKE_INSTALL_LIBDIR} CACHE STRING "Directory in which to install libraries")
mark_as_advanced(TP_INSTALL_LIBDIR)
set(TP_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE STRING "Directory in which to install public headers")
mark_as_advanced(TP_INSTALL_INCLUDEDIR)


================================================
FILE: cmake/Sanitize.cmake
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

if(SANITIZE)
  add_definitions("-fsanitize=${SANITIZE}")
  add_definitions("-fno-omit-frame-pointer")
  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=${SANITIZE}")
endif()


================================================
FILE: docs/cuda_gotchas.md
================================================
# CUDA gotchas

While implementing CUDA channels we hit some undocumented "quirks" which forced us to adapt our original designs. We collect them here for future reference (although this list may not be exhaustive). Please add more items whenever we learn new things "the hard way". We’re mostly interested in unexpected behaviors that could entail substantial design changes, although smaller technical pitfalls are welcome too.

## Most functions initialize a context on the current device

A lot of CUDA functions cause a CUDA context to be initialized for the "current" device (which is a thread-local variable managed by CUDA). This consumes on-device memory (plus it can cause deadlocks when combined with NCCL). By invoking CUDA functions without first explicitly setting the current device we risk accidentally initializing CUDA contexts on devices on which we weren’t supposed to (especially device 0, since it’s the "default"). In order to avoid this, a device guard should be used for *all* operations. They are very cheap, hence don’t be shy! At times it’s not clear which device should be used in such guard, for example during initialization, however we must only use devices that the user has explicitly provided, hence we may have to lazily delay initialization in those cases.

## Querying the device of a pointer can fail

By choice, TensorPipe doesn’t ask users to provide the device index when they pass in a CUDA pointer, for simplicity, since it would be redundant as the device index can be extracted from the pointer. This "extraction" is thus the only CUDA operation for which we can’t possibly set up a device guard. This has proven to be a problem because, due to a bug in CUDA, the extraction would fail if the current device had been *explicitly* set to an invalid (uninitialized) device. (A default "unset" current device would work). This occurred often, because if we used a device guard when the current device was unset, its destructor would explicitly reset the current device to 0. Our investigation seemed to show that an unset current device in the CUDA runtime corresponded to a null current context in the CUDA driver, whereas an invalid current device corresponded to an invalid non-null context. Thus our workaround was to use the driver API directly and first reset its current context to null (in a sense, use a "reverse" device guard, which temporarily "unsets" the current device).

## Releasing shared resources implicitly synchronizes

Some CUDA operations perform an implicit device synchronization: they block the CPU thread until the GPU "catches up", that is, it waits for *all* previously-launched kernels for that device (on any stream) to complete. Such functions also cause later kernels (enqueued by another concurrent thread) to delay their launch on the device until the blocking function returns (we’ve occasionally been calling this a "kernel fence"). This is bad because it would mean that an internal TensorPipe operation can interfere with the user’s scheduling of kernels and thus degrade GPU utilization. The [CUDA programming guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#implicit-synchronization) mentions such a behavior (in section 3.2.6.5.4), however we’ve found out that the list of circumstances where this occurs is incomplete and incorrect. As a rule of thumb, we’ve seen this behavior happen mainly when *releasing* a resource shared among kernels (e.g., device memory, pinned host memory, IPC memory handles), as if CUDA wanted to ensure there were no kernels using this resource anymore before freeing it. A mental model could be to imagine that kernels acquire a shared lock to it, while freeing it needs a unique lock. The only solution to this limitation is to allocate a pool of these resources at the beginning and reuse them.

## Creating IPC events deadlocks

Another CUDA bug we hit was that the creation of CUDA events with the interprocess flag would sometimes deadlock. [Here’s a (not so small) repro](https://gist.github.com/lw/f34836416e7674bbdda8b4925c2999f2). We couldn’t pin it down to a specific condition, or to a race with another call. NVIDIA confirmed the bug and supposedly fixed it in version 450 of the CUDA driver. Since we still need to support earlier versions, as a workaround we’re taking great care to create all our IPC events as early as possible (hoping to avoid whatever races) and reuse them.

## Memory won’t be freed if there’s open IPC handles to it

Imagine that process B has received and opened an IPC handle to some device memory allocated and owned by process A, and process A frees this memory without B first closing its handle to it. The CUDA doc described this as undefined behavior (hence we can’t complain), but in practice what we’ve observed is that the memory will *not* be freed, that is, it will not be reused for subsequent allocation requests, thus possibly causing OOMs. In a sense, it’s if as that memory were "leaked". This is displayed rather confusingly in `nvidia-smi`’s accounting: the memory appears as occupied in the device statistics, but no process appears to be responsible for it.

## Cannot open same IPC handle more than once

There’s a limitation in older versions of CUDA where, if process A allocates some memory, only *one* binding to it can be opened in process B using IPC handles. Attempting to re-open the same handle a second time will fail. Note that one cannot get multiple "different" handles for the same memory, as CUDA always returns the same one. In practice it means that the user could pass some memory for TensorPipe for which it has already manually created and shared a handle, thus it’s unsafe for TensorPipe to also get and open a handle. We can only safely do it for private memory that we’re managing ourselves. Also note that this limitation was lifted in CUDA 11.1.

## The pointer for an opened IPC handle could be "offset" wrt the source pointer

The CUDA doc on this is clear albeit cryptic: given a pointer, CUDA returns the IPC handle for its *allocation*. Hence if we allocate some memory at address p0 and ask for the IPC handle of address p1 = p0 + offset, we’ll get the IPC handle for p0! This means that when we open the handle we need to add back that offset. Luckily CUDA offers a function to query p0 given p1. Note that this situation happens a lot in PyTorch due to the caching allocator sometimes returning slices from larger blocks.

## Not all pairs of GPUs can access each other’s memory

Device to device (D2D) transfers are supported by CUDA only when peer-to-peer (P2P) capabilities exist between the two GPUs. This is handled transparently by CUDA, which will automatically select the most performant direct link. Concretely, it will use NVLink, but only if there’s a dedicated "cable" connecting those two devices. If the NVLink mesh is not a complete graph (as is often the case, e.g., hybrid-cube meshes (HCM) are very common), for the missing pairs CUDA will use PCIe transfers, but only if the two devices are attached to the same chipset/controller/host bridge. If there are multiple chipsets (which is also common, e.g., the DGX machines have two), then D2D transfers between some pairs of GPUs might just not be possible through CUDA! In principle this is easy enough to detect since CUDA offers a function for it (and `nvidia-smi topo` also displays it), however we can’t use it if the two devices aren’t both "visible" to the process (we’re referring to the `CUDA_VISIBLE_DEVICES` environment variable). For such cases the only option is to use the NVML library, which doesn’t honor that env var, but in turn adds the complexity of matching corresponding devices between CUDA and NVML (which is best done through their UUID). Moreover, additional complexity was required in TensorPipe to handle the case where some but not all pairs of GPUs between two processes supported P2P.

## Registering CUDA memory with IB is slow

This is kinda known, but it’s better to repeat it: the registration and deregistration of memory with InfiniBand is considered a "setup" step, and is very slow, and should thus be avoided as much as possible during the "hot" data path, for example using a staging area or by caching these registrations.

## Registering CUDA memory with IB requires an extra NVIDIA kernel module

When we pass a pointer to InfiniBand for registration, InfiniBand needs to understand that this virtual address points to CUDA device memory and not to some CPU memory. For that it needs to be aware of CUDA, and it does so through so-called "peer memory client", which NVIDIA provides (through a separate kernel module) and registers with InfiniBand, and which is queried by InfiniBand before "falling back" to assuming the pointer points to CPU memory. This peer memory client feature is only available in Mellanox’s InfiniBand distribution (called OFED, OpenFabrics Enterprise Distribution), and not in vanilla upstream InfiniBand. On the client side (our side) luckily nothing changes in the API.

## Registering CUDA memory with IB occupies the PCIe window

Each PCIe device has a handful of "memory windows" it exposes, through which the host or other devices can access and modify the device’s memory (both to issue commands and to send/retrieve data). These are called BARs (base address registers). In the case of NVIDIA GPUs the BAR that appears to map to the device’s main memory is BAR1. This is often sized much smaller than the memory itself (say, 256MB for a 16GB GPU), with the idea that it will just be used as a staging area. Also note that CUDA already reserves a few dozen MBs of that window. When registering CUDA device memory with InfiniBand, an additional mapping is created in that window (during the `ibv_reg_mr` call) and will thus fail if the window doesn’t have enough remaining space (e.g., if the buffer being registered is larger than the window). This means we can’t straightforwardly register the user-provided buffers. However, with the right combination of GPU and of CPU BIOS, the BAR1 can become as large as the GPU’s main memory itself, in which case this won’t be a problem anymore.

## Registering CUDA memory with IB doesn’t leak it

Contrary to IPC handles, freeing CUDA device memory while it’s still registered with InfiniBand does not appear to interfere with the deallocation, hence the memory will correctly become reusable.

## IB messages have a maximum size

Each send/recv operation over InfiniBand can only handle up to a certain amount of data, usually at least 1GB, and will fail for larger amounts. This limit can be queried on the device, and chunking must be used for larger sizes.

It appears that, at least on some NICs and with some drivers, there's also a "minimum size" of 32 bytes, with messages failing with odd errors for smaller sizes. It's still unclear whether it's a bug.

## GPUs need to be matched with the right IB NIC

On some machine types there may be multiple GPUs and multiple InfiniBand devices and they need to be carefully matched. Using the same IB NIC for all GPUs will introduce a bottleneck while leaving all other NICs unused. Matching them up "randomly" means that the data paths over PCIe of different GPU-NIC pairs might cross each other (thus, again, causing a bottleneck), might traverse the host, or otherwise interfere. These machines are usually set up so that each GPU has one NIC that it’s "naturally" closest to, for example they share the same PCIe switch, thus we need a logic to be able to detect and implement this.


================================================
FILE: docs/development.md
================================================
# Development

TensorPipe uses CMake for its build system.

## Dependencies

To build TensorPipe, you need:

* C++14 compatible compiler (GCC >= 5.5 or Clang >= 6)

## Clone the repository

Example:

``` shell
git clone --recursive https://github.com/pytorch/tensorpipe
```

If you have updated an already cloned repository, make sure that the
submodules are up to date:

``` shell
git submodule sync
git submodule update --init
```

It is imperative to check out the submodules before running CMake.

Find the list of submodules and a description of what they're used for
on [this page][third_party].

[third_party]: https://github.com/pytorch/tensorpipe/tree/main/third_party

## Using CMake

Example:

``` shell
mkdir build
cd build
cmake ../ -DCMAKE_BUILD_TYPE=Debug -DSANITIZE=thread
make
```

You can specify CMake variables by passing them as arguments to the `cmake` command.

Useful CMake variables:

* `CMAKE_C_COMPILER` -- Define which C compiler to use.
* `CMAKE_CXX_COMPILER` -- Define which C++ compiler to use.
* `CMAKE_C_FLAGS` -- Additional flags for the C compiler.
* `CMAKE_CXX_FLAGS` -- Additional flags for the C++ compiler.
* `CMAKE_BUILD_TYPE` -- For example: `release`, `debug`.

Useful TensorPipe specific variables:

* `SANITIZE` -- configure the sanitizer to use (if any); for
  example: `address` or `thread`, to run with `asan` or `tsan`,
  respectively.

## Ninja

To make CMake output something other than the default `Makefile`, see
[`cmake-generators(7)`][cmake-generators]. We like to use the
[Ninja][ninja] generator because it works well for incremental builds.
On the command line, specify `-GNinja` to use it.

[cmake-generators]: https://cmake.org/cmake/help/v3.4/manual/cmake-generators.7.html
[ninja]: https://en.wikipedia.org/wiki/Ninja_(build_system)


================================================
FILE: docs/linux_support.md
================================================
This document is intended for developers and advanced users. It’s the kind of document that risks going out of date very quickly, hence take it with a grain of salt.

In order to try to be as performant as possible, TensorPipe sometimes relies on new and advanced kernel features. This is causing issues to users who are building and/or running on old kernels. Hence, whenever we use such features, we should always “guard” them somehow, i.e., detect their availability at compile-time or (preferably) at runtime, and disable the backend or mark it non-viable. It is ok-ish for users with old kernels to not have access to all backends, as long as there’s always at least one backend they can use.

## Compile-time vs runtime, Linux vs glibc

Unfortunately, both the kernel version used for building and the one used for running affect whether we can use a feature. This means that the availability of a function or flag during build doesn’t mean it will be supported at runtime (this is especially true for the official builds of PyTorch). On the other hand, it also means that even if the runtime kernel supports a feature, we may not be able to use it because we didn’t have access to a system header when building (e.g., to get a flag). While sometimes we can “polyfill” this information, it’s not always doable.

An additional complication is added by the fact that we typically access syscalls through their glibc wrappers. First of all, this means we only get access to a syscall once glibc wraps it, which could happen years later. But it also means we link to a glibc symbol, and thus to a specific version of glibc’s shared object. With the kernel, using an unsupported feature results in a runtime error when first used, which we can catch; but with glibc we get a loader error due to missing symbols at startup, even if the user doesn’t use TensorPipe, even if we could “tolerate” these symbols’ absence. It is thus desirable at times to avoid the glibc wrappers.

## Common tricks for how to guard/polyfill

* Kernel flags are typically defined as preprocessor flags (i.e., `#define FOO`). This is stuff like `O_TMPFILE`, `MAP_SHARED_VALIDATE`, `PR_SET_PTRACER`, ... It’s easy to detect this in the code, with a `#ifdef FOO`, and since these flags are (usually?) constants, it’s also easy to define them ourselves. This “polyfill” allows us to build on an old kernel but still run on a new one.
* For a new-ish syscall, we probably don’t want to use the glibc wrapper, for the problems described above, and because it’s hard to detect its availability (the best option is a CMake check whose result we inject as a preprocessor flag). An alternative is to invoke it through the generic `syscall` syscall, using the `SYS_foo` flags. This could bring a few issues on its own (especially for 32bit systems) but for now it hasn’t come to bite us. This way we skip glibc entirely, and simply end up getting ENOSYS if the runtime kernel doesn’t support the syscall. Those `SYS_foo` flags are defined by glibc, but it seems glibc defines them automatically for all the syscalls it “finds” in the kernel, and not just for the syscalls that glibc supports. Unfortunately we cannot “polyfill” the `SYS_foo` flags if we don’t find them, because they have different values on different architectures.

## What do others do?

Since [Apr 2017](https://github.com/libuv/libuv/commit/4e6101388015c6d0879308d566f0a4b79edc0c13), libuv only supports Linux 2.6.32 (December 2009) and glibc 2.12 (May 2010). (This doesn’t mean earlier versions are necessarily broken, but that libuv reserves the right to break them). Libuv seems to be somewhat tied to the RedHat/CentOS releases, which are common and have a very long lifespan. It doesn’t make sense for us to support older versions than what libuv does, because if libuv decides to break them there’s nothing we can do.

PyTorch tries to support the [manylinux2014 platform](https://www.python.org/dev/peps/pep-0599/) (defined by Python for use in PyPI/pip), which allows up to glibc 2.17 (December 2012). However, it’s not clear if we’re there yet, and the previous version is `manylinux2010` which comes with glibc 2.12.

Hence a reasonable recommendation seems to be to draw the line at Linux 2.6.32 and glibc 2.12. However, people with older versions than those have already reported issues and asked for fixes, which we can probably consider on a case-by-case basis.

## Kernel features used by TensorPipe

### Linux 2.1.4 (October 1996)

* The `getresuid` and `getresgid` syscalls.

### Linux 2.3.16 (September 1999)

* The `/proc/sys/kernel/random/boot_id` file. See `random(4)`.

  No git hash as it predates the use of git by Linux

  https://github.com/torvalds/linux/blob/1da177e4c3f41524e886b7f1b8a0c1fc7321cac2/drivers/char/random.c#L1270-L1278

### Linux 2.3.20 (October 1999)

* The `PR_GET_DUMPABLE` flag for `prctl`.

  No git hash as it predates the use of git by Linux

  https://github.com/torvalds/linux/blob/1da177e4c3f41524e886b7f1b8a0c1fc7321cac2/include/linux/prctl.h#L10

### Linux 2.6.26 (July 2008)

* Version 3 of Linux capabilities. (Initial capability support, including the `capget` syscall, dates back to Linux 2.1.100, from May 1998). See `capget(2)`.

  https://github.com/torvalds/linux/commit/ca05a99a54db1db5bca72eccb5866d2a86f8517f

### Linux 3.2 (January 2012)

* Cross-Memory Attach (i.e., the `process_vm_readv` syscall). See `process_vm_readv(2)`.

  https://github.com/torvalds/linux/commit/fcf634098c00dd9cd247447368495f0b79be12d1

### Linux 3.4 (May 2012)

* The YAMA security module, and thus the `/proc/sys/kernel/yama/ptrace_scope` file. This includes the `PR_SET_PTRACER` and the `PR_SET_PTRACER_ANY` flags for `prctl`. See `ptrace(2)`.

  https://github.com/torvalds/linux/commit/2d514487faf188938a4ee4fb3464eeecfbdcf8eb
  https://github.com/torvalds/linux/commit/bf06189e4d14641c0148bea16e9dd24943862215

### Linux 3.8 (February 2013)

* The `/proc/[pid]/ns/[ns]` files. Although that directory, and the `net` file therein, were already present in 3.0, the `pid` and `user` ones only arrived in 3.8 and, more importantly, the ability to identify a namespace by the inode number of those files came in 3.8 (when they stopped being hardlinks and became symlinks). See `proc(5)` and `namespaces(7)` and others.

  https://github.com/torvalds/linux/commit/6b4e306aa3dc94a0545eb9279475b1ab6209a31f
  https://github.com/torvalds/linux/commit/13b6f57623bc485e116344fe91fbcb29f149242b
  https://github.com/torvalds/linux/commit/57e8391d327609cbf12d843259c968b9e5c1838f
  https://github.com/torvalds/linux/commit/cde1975bc242f3e1072bde623ef378e547b73f91
  https://github.com/torvalds/linux/commit/bf056bfa80596a5d14b26b17276a56a0dcb080e5
  https://github.com/torvalds/linux/commit/98f842e675f96ffac96e6c50315790912b2812be

### Linux 3.11 (September 2013)

* The `O_TMPFILE` flag for `open`. See `open(2)`.

  https://github.com/torvalds/linux/commit/60545d0d4610b02e55f65d141c95b18ccf855b6e

### Linux 3.17 (October 2014)

* The `memfd_create` syscall. See `memfd_create(2)`.

  https://github.com/torvalds/linux/commit/9183df25fe7b194563db3fec6dc3202a5855839c

### Linux 4.11 (April 2017)

* The `/sys/kernel/security/lsm` file in `securityfs` (a list of active Linux Security Modules).

  https://github.com/torvalds/linux/commit/d69dece5f5b6bc7a5e39d2b6136ddc69469331fe

### TODO

* All that sysfs PCIe stuff done by CUDA GDR (e.g., resolving GPUs and NICs to PCIe paths, getting the BAR1 size, ...), plus checking the nv_mem_peer module

## Glibc features required by TensorPipe

### Glibc 2.2.5 (January 2002)

* The `capget` function.

### Glibc 2.3.3 (December 2003)

* The `dlinfo` function. (All of `dlopen`, `dlclose`, `dlsym` and `dlerror` were present since at least glibc 2.0).

### Glibc 2.12 (May 2010)

* The `pthread_setname_np` function.


================================================
FILE: docs/shm.md
================================================
# The shm transport

This document is an attempt to capture the design principles and inner
working of the shm transport (see `tensorpipe/transport/shm`). Its
performance makes it an efficient alternative to IP based transports
for same-machine communication.

At the core of a transport implementation lies a listener, a
connection, and a context. Listeners accept connections. Contexts
create listeners and can connect to remote listeners.

## Concepts


### Ring buffers

Shared memory ring buffers are a core building block for the shm
transport. They are implemented with split control and data
sections. This means the data section can be fully aligned. The header
section stores a read/write transaction flag and the head and tail
offsets into the data section. Producers and consumers of the ring
buffer use atomic instructions to mutate this header depending on
their intent.

### File descriptors

The header and data segments of a shared memory ring buffer are
created as follows. First, a file is created in `/dev/shm` with the
`O_TMPFILE` flag. This means that anything written to the resulting
file is lost when the last file descriptor is closed, unless the file
is given a name. Because we never give this file a name, the segment
is automatically cleaned up when the last process that has its file
descriptor terminates.

Per above, creating a shared memory ring buffer yields 2 file
descriptors, one for the header segment and one for the data segment.
These file descriptors are shared over a Unix domain socket.

### The reactor

This is a TensorPipe specific component. It uses a shared memory ring
buffer to allow other processes to trigger functions. If a process wants
another process to trigger a function, it registers this function with
the reactor, and gets back a 32-bit token. Then, the file descriptors of
the reactor's ring buffer, as well as the token, are sent to another
process. The other process can now map the reactor ring buffer, and
trigger the registered function by writing the token to the ring buffer.

See [considerations](#considerations) below on why this was used.

### Unix domain sockets

Coordination between process to bootstrap a connection that uses
shared memory ring buffers is implemented using Unix domain sockets.
The listening side of a connection binds and listens on an abstract
socket address. A typical Unix domain socket "address" is a filesystem
pathname. An abstract socket address, by contrast, is not visible on
any filesystem. They exist in a single abstract socket namespace
shared by all processes on the machine. Removing the filesystem
dependency means two things:

1. (+) It is not necessary to purge stale Unix domain socket files.
2. (-) These sockets don't have permissions, so any process that has
   its name can connect.

Read more about abstract domain sockets [here][1] and [here][2].

[1]: http://man7.org/linux/man-pages/man7/unix.7.html
[2]: https://utcc.utoronto.ca/~cks/space/blog/linux/SocketAbstractNamespace

Once processes have established a Unix domain socket, it is used to:

1. Pass the shared memory file descriptors to a peer process.
2. Signal peer termination (through eof on socket closure).
3. ... nothing else. All data moves through the ring buffers.

**Note:** abstract socket addresses are a Linux specific feature.

## Bringing it together

So, to establish one of these shared memory connections, we first
listen on some unique abstract socket address. This address must be
known to the process that wishes to connect. For a quick test we can
use a pre-shared address. Otherwise, we can generate a UUID and share
it with some out of band mechanism. The connecting process connects
and the listening process accepts. We have now established a Unix
domain socket and move on to the next step.

Each process creates a new shared memory ring buffer specifically for
this connection. We refer to this ring buffer as the _inbox_. We
expect each process to be pinned to a specific NUMA node and perform
the memory allocation in the same NUMA domain.

The file descriptors of the inbox, the file descriptors of the
reactor, and a token to trigger readability of the inbox, are shared
over the socket.

Each process receives file descriptors from their peer and initializes
the corresponding ring buffers. The peer's inbox is referred to as the
_outbox_. The token to trigger remote readability is referred to as
the _outbox trigger_.

The connection is now established! Writes are performed by writing
directly into the outbox and triggering the outbox trigger. The
trigger wakes up the peer's reactor and executes a function that
notifies the connection of readability. Subsequently, the connection
checks if there was a pending read operation, and processes it if so.

When either process destructs the connection, or crashes, the original
Unix domain socket is closed, which signals the peer process that it
shouldn't expect more writes to its inbox and can destruct the
connection as well.

## Considerations

A single process may have multiple connections. Therefore, it may have
multiple inbox ring buffers. One way to react to incoming writes is to
simply check if there are any bytes to read. This requires checking all
N inboxes for reads, which can become problematic if N gets large. To
better solve this multiplexing problem we initially used an
[`eventfd(2)`][eventfd] per inbox. This file descriptor was registered
with the existing [`epoll(7)`][epoll] loop and would trigger the
readability function when it became readable. To perform a write, the
peer process would first write to the outbox and then write to the
peer's eventfd.

[eventfd]: http://man7.org/linux/man-pages/man2/eventfd.2.html
[epoll]: http://man7.org/linux/man-pages/man7/epoll.7.html

A simple ping/pong performance benchmark using this approach, with both
processes pinned to the same NUMA node, showed a lower bound latency of
~12 microseconds. This seemed high for a pair of ring buffer writes, so
we explored alternatives, and came up with the reactor approach. Now,
the same benchmark runs with a lower bound latency of about ~1.7
microseconds, which is a 7x improvement over the `eventfd(2)`/`epoll(7)`
approach.


================================================
FILE: docs/thread_model.md
================================================
# TensorPipe's thread model

TensorPipe is spawning multiple threads internally. This is a design
requirement as, for example, a single thread wouldn't manage to drive a
modern network interface card (NIC) at capacity and saturate its
bandwidth, even if it did nothing by write on the socket: multiple
threads writing in parallel to multiple sockets are the only way to
achieve that.

Moreover, the possibility of spawning new threads when needed allows
for a simpler architecture in the implementation of TensorPipe's
modular approach to backends (transports and channels): if one of these
backends needs to perform some heavy operation (a blocking syscall, an
event loop, ...) it can launch a dedicated thread for it rather than
having to schedule it on the user thread or on a shared thread pool,
thus having to "fit" the operation into some framework.

This heavy reliance on multi-threading poses of course challenges in
coordination and robustness. This document aims to outline the patterns
we've ended up adopting to have a structured and principled design
around this.

## Callbacks

TensorPipe uses callbacks to organize the control flow around
asynchronous and deferred execution. While this may be an anti-pattern
leading to so-called "spaghetti code" or "callback hell", we realized
that it was the only approach that would yield the performance we need.
Modern alternatives to callbacks (promises/futures, coroutines, ...) 
would have introduced an unacceptable overhead in some cases.

Nearly all operations in TensorPipe are non-blocking and are performed
asynchronously, in background, with their results notified through
callbacks. This includes the creation of pipes and connections (the
objects may still be performing initialization when they are given to
the user and, although operations can be performed on them, these will
be delayed until setup completes). And it also includes destruction,
which means that internal resources may not be immediately freed when a
user-facing object is deleted. The only synchronization point that
allows the user to wait for such cleanup to finish is the context's
`join` method. Some other methods that may occasionally wait are the
ones that return a value, for example the ones to retrieve addresses.

## Shared pointers

As soon as threads and callbacks enter the mix, race conditions start
to pop up. Among the first ones, there's the problem of ownership:
ideally we want a `unique_ptr`-style semantics, where each object has a
clear owner who controls its lifetime. However, when this owner asks
another thread to perform an operation on that object as part of a
callback, that callback also (temporarily) needs access to the object.
As there may be multiple operations with multiple callbacks at the same
time, transferring ownership isn't an option, and sharing it is the
only way to go. This however requires synchronization among the various
users: if the "real" user had a `unique_ptr` and gave raw pointers to
the callbacks, the real user may delete the object without the
callbacks noticing or having any way to stop/delay it. This would then
cause use-after-free errors. There must thus be a sort of "lock" that
prevents the object from being deleted while someone is working on it,
like a "semaphore" counting the users. It turns out a perfect tool for
the job is `shared_ptr`. Acquiring a lock on the object corresponds to
obtaining a `shared_ptr` instance, which increases the reference count.
The object will only be deleted when its refcount reaches zero, which
means all its users (the "real" ones and the callbacks) have stopped
using the object.

We have however solved a problem by creating an opposite one: a memory
leak. Imagine an object (say, a pipe) that is the "real" owner of
another one (say, a channel) from which it is expecting a callback, and
that callback captures a `shared_ptr` to the first object in its
closure. This is a reference cycle. It means that even if the "real"
owner of the first object relinquishes its `shared_ptr`, the objects
won't be destroyed until the callback fires (if ever). An easy solution
to this is to have callbacks only keep a `shared_ptr` when they are
running, not while they are waiting. Again, the standard library has
the perfect tool for the job: the `weak_ptr`, which will keep the
refcount unchanged but can be "locked" to obtain a real `shared_ptr`
when needed (curious coincidence that the terminology aligns with our).

So, in short: the real owner of an object keeps a `shared_ptr` to it,
it passes `weak_ptr`s to be stored in callbacks, and these are locked
back to `shared_ptr`s just before running the callbacks. (If locking
fails, the callback isn't run).

## Public objects vs private implementations

It turns out that what we said above isn't always true: in some cases
we may want a callback to keep the object alive until it has fired.
This happens because some callbacks are one half of a "contract"
regarding data ownership: throughout the API (at higher and lower
levels), `read`, `write`, `send` and `recv` methods take some data
(source or destination buffers), and by doing so the caller hands over
control of the data to the object. The way for the object to yield
ownership back to the caller is by invoking the callback. We must thus
ensure that these callbacks are always called. However, we must also
avoid calling them when we're not ready yet to give up access to the
data. For a more concrete example, consider the user trying to destroy
a pipe that has a pending write operation, while some other thread is
simultaneously performing a memory copy as part of that write
operation. If we invoke the write operation's callback before aborting
the memory copy we're giving the user the right to deallocate the
buffer, which may lead the other thread to segfault.

Here is what needs to happen: when a user deletes a pipe, all its
pending operations must be interrupted, which in turn also aborts the
lower level operations; the pipe's callbacks, however, must not be
fired and instead kept alive while waiting for the lower level
operations to wrap up, and only then they can be triggered. This shows
that a subset of the pipe, containing at least the callbacks, must
survive the destruction of the whole pipe. In other words, the lifetime
of the inner part must be detacheable from the one of the outer shell.

In order to do so, most public objects are just thin wrappers around a
single member field, which is just a pointer to an instance of a
private "implementation" (abbreviated as impl), which is where
everything happens. The impl is a `shared_ptr` so that its life cycle
can be detached and extended with respect to the one of the public
object. The callbacks that we must wait for in order to regain control
of some resource also capture a `shared_ptr`. This way we can still get
the "signal" from when the public object is deleted (and can start
terminating pending operations) but we're also able to keep the impl
around while wait for the shut down to complete.

## Locking

Objects can be accessed and worked on from many threads, from all
directions, above (user threads, higher up the stack) and below 
(low-level backend threads). To avoid race conditions on the internal
state of these object, we must have mutual exclusion between threads,
using locks. While it may be possible to have separate fine-grained
locks for different parts of some objects, in general it is safer
and easier to have one mutex per object, and use it to lock all
operations.

That's easily said, but it just as easily leads to deadlocks, which in
our experience come in two flavors:

- When an object (holding its own lock) calls a "upward" callback which
  (inline/serially) tries to perform an operation on that same object,
  which tries to acquire the same lock. This is a perfectly legitimate
  behavior, since all of our callbacks are "one-shot", that is, they
  "burn out" after they fire and thus must be immediately rearmed.

- When an object (holding its own lock) performs an operation on a
  lower level object, passing a callback to it, and this callback is
  called immediately (inline/serially) and tries to also acquire the
  lock of the first object. This typically happens when the lower level
  object is in an error state and can thus "shortcut" the operation and
  immediately trigger the callback instead of deferring it to a thread.

Mitigations for these problems are possible but none is universal and
they all have drawbacks. Examples are:

- When calling upward callbacks, extract one from the object onto the
  stack, put the object in a consistent state, release its lock and
  then call the callback. This works but there's a racing risk which
  would cause callbacks to not be called in their intended order.

- Have a dedicated thread from which to invoke callbacks. Therefore
  other threads, instead of triggering callbacks, push them to some
  queue that is consumed by this thread. This resembles the semi-future
  and executor pattern. We used to have such a pattern in place for
  calling the pipe callbacks but it was introducing an unacceptable
  latency overhead.

- The backends already typically have a thread they can defer callbacks
  to, and for the most part they already do. However having such a
  thread isn't necessarily a requirement for a transport, and such
  threads may not be running at all times (e.g., once a backend has
  been joined).

- We could replace regular locks with reentrant locks (also called
  recursive). This is typically considered bad practice, though, and
  when at some point we tried this we indeed hit problems.

The next section presents a more disciplined way of dealing with races.

## Event loops

A classic way of dealing with parallel I/O is event loops: repeatedly
polling a set of file descriptors for readability/writability (blocking
to wait for them to become ready), dealing with them, and repeating.
Syscalls to do this are `select`, `epoll`, and more. The `libuv`
library used by one of TensorPipe's transports is also based on an
event loop. Event loops are typically single-threaded, and they allow
to "simulate" parallelism by multiplexing thread if those threads would
spend most of their time doing blocking I/O.

The simplicity of event loops, their single-threaded safety and their
established effectiveness prompted us to make them a foundation of our
threading model.

If an object already has a thread to which it offloads some operations
(this is the case for most transports and some channels, but not the
pipe) then we defer all operations to it. And we really mean all of
them: all manipulation of the object (scheduling operations, querying
information, running callbacks) must be done from within that event
loop thread. All operations that are attempted on the object, either
from another thread or from within the event loop thread (for example,
by a callback in user code) are deferred, appended to a queue, and
dealt with at a later iteration of the loop. This guarantees that we'll
always have a single thread accessing such objects, thus ensuring
thread safety without even using any locks. Note that such design isn't
a requirement for transports, it's just the pattern that we've adopted
for all our current transports.

If, on the other hand, an object does not have access to a thread to
use as an event loop, we'll "borrow" the caller's thread and
temporarily use it as an event loop. We'll similarly have a queue of
tasks, and the thread will consume them one by one, until none are
left, at which point we'll stop occupying the thread and release it
back to the caller. If any new operation is attempted by another thread
while one of these temporary event loops is running, that operation is
added to the queue and thus deferred to the already-running event loop,
with the new thread immediately able to return to what it was doing.


================================================
FILE: setup.py
================================================
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import os
import subprocess
import sys
from pathlib import Path

from setuptools import Extension, setup
from setuptools.command.build_ext import build_ext


class CMakeBuild(build_ext):
    def run(self):
        for ext in self.extensions:
            self.build_extension(ext)

    def build_extension(self, ext):
        if not os.path.exists(self.build_temp):
            os.makedirs(self.build_temp)

        source_path = Path(__file__).parent.resolve()
        output_path = Path(self.get_ext_fullpath(ext.name)).parent.resolve()
        build_type = "Debug" if self.debug else "Release"

        cmake_cmd = [
            "cmake",
            f"{source_path}",
            f"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={output_path}",
            f"-DPYTHON_EXECUTABLE={sys.executable}",
            f"-DCMAKE_BUILD_TYPE={build_type}",
            "-DCMAKE_C_COMPILER=clang-6.0",
            "-DCMAKE_CXX_COMPILER=clang++-6.0",
            "-DCMAKE_POSITION_INDEPENDENT_CODE=true",
            "-DTP_BUILD_PYTHON=true",
        ]

        for opt in os.environ:
            if opt.startswith("TP_"):
                cmake_cmd.append(f"-D{opt}={os.environ[opt]}")

        make_cmd = ["make", "-j", "pytensorpipe"]

        subprocess.check_call(cmake_cmd, cwd=self.build_temp)
        subprocess.check_call(make_cmd, cwd=self.build_temp)


setup(
    name="tensorpipe",
    version="0.0.0",
    author="Facebook AI Research",
    ext_modules=[Extension("pytensorpipe", sources=[])],
    cmdclass={"build_ext": CMakeBuild},
    zip_safe=False,
)


================================================
FILE: tensorpipe/.clang-format
================================================
---
AccessModifierOffset: -1
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: true
AlignOperands:   false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: false
BinPackParameters: false
BraceWrapping:
  AfterClass:      false
  AfterControlStatement: false
  AfterEnum:       false
  AfterFunction:   false
  AfterNamespace:  false
  AfterObjCDeclaration: false
  AfterStruct:     false
  AfterUnion:      false
  BeforeCatch:     false
  BeforeElse:      false
  IndentBraces:    false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
BreakAfterJavaFieldAnnotations: false
BreakStringLiterals: false
ColumnLimit:     80
CommentPragmas:  '^ IWYU pragma:'
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat:   false
ForEachMacros:   [ FOR_EACH_RANGE, FOR_EACH, ]
IncludeCategories:
  - Regex:           '^<.*\.h(pp)?>'
    Priority:        1
  - Regex:           '^<.*'
    Priority:        2
  - Regex:           '.*'
    Priority:        3
IndentCaseLabels: true
IndentWidth:     2
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
MacroBlockBegin: ''
MacroBlockEnd:   ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: false
PenaltyBreakBeforeFirstCallParameter: 1
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 2000000
PointerAlignment: Left
ReflowComments:  true
SortIncludes:    true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles:  false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard:        Cpp11
TabWidth:        8
UseTab:          Never
...


================================================
FILE: tensorpipe/.clang-tidy
================================================
---
InheritParentConfig: true
Checks: '
readability-identifier-naming,
readability-inconsistent-declaration-parameter-name,
readability-named-parameter,
'
FormatStyle: file
CheckOptions:
# Names of classes (and structs?)
- { key: readability-identifier-naming.ClassCase, value: CamelCase }
# Names of enums and enum classes
- { key: readability-identifier-naming.EnumCase, value: CamelCase }
# Names of members and methods of classes (and structs?)
- { key: readability-identifier-naming.MemberCase, value: camelBack }
- { key: readability-identifier-naming.PrivateMemberCase, value: camelBack }
- { key: readability-identifier-naming.PrivateMemberSuffix, value: '_' }
- { key: readability-identifier-naming.ProtectedMemberCase, value: camelBack }
- { key: readability-identifier-naming.ProtectedMemberSuffix, value: '_' }
- { key: readability-identifier-naming.MethodCase, value: camelBack }
# Names of parameters and local variables
- { key: readability-identifier-naming.LocalVariableCase, value: camelBack }
- { key: readability-identifier-naming.ParameterCase, value: camelBack }
# Names of constants
- { key: readability-identifier-naming.GlobalConstantCase, value: CamelCase }
- { key: readability-identifier-naming.GlobalConstantPrefix, value: 'k' }
# FIXME scoped enums are only supported in clang-tidy 12.
# Names of (non-class) enum members
# - { key: readability-identifier-naming.EnumConstantCase, value: UPPER_CASE }
# Names of enum class members
# - { key: readability-identifier-naming.ScopedEnumConstantCase, value: CamelCase }
# - { key: readability-identifier-naming.ScopedEnumConstantPrefix, value: 'k' }
# Names of template parameters
- { key: readability-identifier-naming.TemplateParameterCase, value: CamelCase }
# Names of global functions
- { key: readability-identifier-naming.FunctionCase, value: camelBack }
# Names of namespaces
- { key: readability-identifier-naming.NamespaceCase, value: lower_case }
...


================================================
FILE: tensorpipe/CMakeLists.txt
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# TP_SRCS is the list of source files that we need to build libtensorpipe.
set(TP_SRCS)

# TP_PUBLIC_HDRS is the list of public header files that we need to install.
set(TP_PUBLIC_HDRS)

# TP_LINK_LIBRARIES is list of dependent libraries to be linked
set(TP_LINK_LIBRARIES)

# TP_INCLUDE_DIRS is list of include path to be used
set(TP_INCLUDE_DIRS)

list(APPEND TP_SRCS
  channel/error.cc
  channel/helpers.cc
  common/address.cc
  common/allocator.cc
  common/error.cc
  common/fd.cc
  common/socket.cc
  common/system.cc
  core/context.cc
  core/context_impl.cc
  core/error.cc
  core/listener.cc
  core/listener_impl.cc
  core/pipe.cc
  core/pipe_impl.cc
  transport/error.cc)

list(APPEND TP_PUBLIC_HDRS
  tensorpipe.h
  channel/context.h
  channel/error.h
  common/buffer.h
  common/cpu_buffer.h
  common/device.h
  common/error.h
  common/optional.h
  core/context.h
  core/error.h
  core/listener.h
  core/message.h
  core/pipe.h
  transport/context.h
  transport/error.h)

list(APPEND TP_INCLUDE_DIRS
  $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>
  $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>
  $<INSTALL_INTERFACE:${TP_INSTALL_INCLUDEDIR}>)


## Channels

### basic

list(APPEND TP_SRCS
  channel/basic/channel_impl.cc
  channel/basic/context_impl.cc
  channel/basic/factory.cc)
list(APPEND TP_PUBLIC_HDRS
  channel/basic/factory.h)

### xth

list(APPEND TP_SRCS
  channel/xth/channel_impl.cc
  channel/xth/context_impl.cc
  channel/xth/factory.cc)
list(APPEND TP_PUBLIC_HDRS
  channel/xth/factory.h)

### cma

tp_conditional_backend(
  TP_ENABLE_CMA "Enable cross-memory attach channel" "LINUX")
if(TP_ENABLE_CMA)
  list(APPEND TP_SRCS
    channel/cma/channel_impl.cc
    channel/cma/context_impl.cc
    channel/cma/factory.cc)
  list(APPEND TP_PUBLIC_HDRS
    channel/cma/factory.h)
  set(TENSORPIPE_HAS_CMA_CHANNEL 1)
endif()

### mpt

list(APPEND TP_SRCS
  channel/mpt/channel_impl.cc
  channel/mpt/context_impl.cc
  channel/mpt/factory.cc)
list(APPEND TP_PUBLIC_HDRS
  channel/mpt/factory.h)

## Transports

### uv

list(APPEND TP_SRCS
  transport/uv/connection_impl.cc
  transport/uv/context_impl.cc
  transport/uv/error.cc
  transport/uv/factory.cc
  transport/uv/listener_impl.cc
  transport/uv/loop.cc
  transport/uv/sockaddr.cc
  transport/uv/utility.cc)
list(APPEND TP_PUBLIC_HDRS
  transport/uv/error.h
  transport/uv/factory.h
  transport/uv/utility.h)

# Add uv package
find_package(uv REQUIRED)
list(APPEND TP_LINK_LIBRARIES uv::uv)

### shm

tp_conditional_backend(
  TP_ENABLE_SHM "Enable shared-memory transport" "LINUX")
if(TP_ENABLE_SHM)
  list(APPEND TP_SRCS
    common/epoll_loop.cc
    common/shm_segment.cc
    transport/shm/connection_impl.cc
    transport/shm/context_impl.cc
    transport/shm/factory.cc
    transport/shm/listener_impl.cc
    transport/shm/reactor.cc
    transport/shm/sockaddr.cc)
  list(APPEND TP_PUBLIC_HDRS
    transport/shm/factory.h)
  set(TENSORPIPE_HAS_SHM_TRANSPORT 1)
endif()

### ibv

tp_conditional_backend(
  TP_ENABLE_IBV "Enable InfiniBand transport" "LINUX")
if(TP_ENABLE_IBV)
  list(APPEND TP_SRCS
    common/epoll_loop.cc
    common/ibv.cc
    transport/ibv/connection_impl.cc
    transport/ibv/context_impl.cc
    transport/ibv/error.cc
    transport/ibv/factory.cc
    transport/ibv/listener_impl.cc
    transport/ibv/reactor.cc
    transport/ibv/sockaddr.cc
    transport/ibv/utility.cc)
  list(APPEND TP_PUBLIC_HDRS
    transport/ibv/error.h
    transport/ibv/factory.h
    transport/ibv/utility.h)
  set(TENSORPIPE_HAS_IBV_TRANSPORT 1)
endif()


## MAC OS specific library deps

if(APPLE)
  find_library(CF CoreFoundation)
  find_library(IOKIT IOKit)
  list(APPEND TP_LINK_LIBRARIES ${CF} ${IOKIT})
endif()


## Config

configure_file(config.h.in config.h)


## Libnop

# We should keep libnop headers private as they should not be exposed to downstream users,
# but they're currently transitively included by tensorpipe/transport/connection.h (which
# is still unclear whether it should be a public or private header).
list(APPEND TP_INCLUDE_DIRS $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/libnop/include>)


## Target

# Add the tensorpipe library target
add_library(tensorpipe ${TP_STATIC_OR_SHARED} ${TP_SRCS})

# Set target properties
if(BUILD_SHARED_LIBS)
  set_target_properties(tensorpipe PROPERTIES POSITION_INDEPENDENT_CODE 1)
endif()

# Add all the link libraries and include directories to the tensorpipe target and keeping the link PUBLIC
target_link_libraries(tensorpipe PRIVATE ${TP_LINK_LIBRARIES})
target_include_directories(tensorpipe PUBLIC ${TP_INCLUDE_DIRS})


## Install

install(TARGETS tensorpipe
        EXPORT TensorpipeTargets
        LIBRARY DESTINATION ${TP_INSTALL_LIBDIR}
        ARCHIVE DESTINATION ${TP_INSTALL_LIBDIR})

foreach(_header_file ${TP_PUBLIC_HDRS})
  get_filename_component(_TP_HEADER_SUBDIR "${_header_file}" DIRECTORY)
  install(FILES ${_header_file}
          DESTINATION ${TP_INSTALL_INCLUDEDIR}/tensorpipe/${_TP_HEADER_SUBDIR})
endforeach()

install(FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h
        DESTINATION ${TP_INSTALL_INCLUDEDIR}/tensorpipe)


## CUDA

if(TP_USE_CUDA)
  # TP_SRCS is the list of source files that we need to build libtensorpipe.
  set(TP_CUDA_SRCS)

  # TP_PUBLIC_HDRS is the list of public header files that we need to install.
  set(TP_CUDA_PUBLIC_HDRS)

  # TP_LINK_LIBRARIES is list of dependent libraries to be linked
  set(TP_CUDA_LINK_LIBRARIES)

  # TP_INCLUDE_DIRS is list of include path to be used
  set(TP_CUDA_INCLUDE_DIRS)

  find_package(CUDA REQUIRED)
  list(APPEND TP_CUDA_LINK_LIBRARIES ${CUDA_LIBRARIES})
  list(APPEND TP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})

  list(APPEND TP_CUDA_SRCS
    common/cuda_buffer.cc)
  list(APPEND TP_CUDA_PUBLIC_HDRS
    tensorpipe_cuda.h
    common/cuda_buffer.h)

  ### cuda_xth

  list(APPEND TP_CUDA_SRCS
    channel/cuda_xth/channel_impl.cc
    channel/cuda_xth/context_impl.cc
    channel/cuda_xth/factory.cc)
  list(APPEND TP_CUDA_PUBLIC_HDRS
    channel/cuda_xth/factory.h)

  ### cuda_basic

  list(APPEND TP_CUDA_SRCS
    channel/cuda_basic/channel_impl.cc
    channel/cuda_basic/context_impl.cc
    channel/cuda_basic/factory.cc
    common/cuda_loop.cc)
  list(APPEND TP_CUDA_PUBLIC_HDRS
    channel/cuda_basic/factory.h)

  ### cuda_ipc

  tp_conditional_backend(
    TP_ENABLE_CUDA_IPC "Enable CUDA inter-process communication channel" "TP_USE_CUDA")
  if(TP_ENABLE_CUDA_IPC)
    list(APPEND TP_CUDA_SRCS
      channel/cuda_ipc/channel_impl.cc
      channel/cuda_ipc/context_impl.cc
      channel/cuda_ipc/factory.cc)
    list(APPEND TP_CUDA_PUBLIC_HDRS
      channel/cuda_ipc/factory.h)
    set(TENSORPIPE_HAS_CUDA_IPC_CHANNEL 1)
  endif()

  ### cuda_gdr

  tp_conditional_backend(
    TP_ENABLE_CUDA_GDR "Enable CUDA GpuDirect (InfiniBand) channel" "LINUX")
  if(TP_ENABLE_CUDA_GDR)
    list(APPEND TP_CUDA_SRCS
      common/ibv.cc
      channel/cuda_gdr/channel_impl.cc
      channel/cuda_gdr/context_impl.cc
      channel/cuda_gdr/factory.cc)
    list(APPEND TP_CUDA_PUBLIC_HDRS
      channel/cuda_gdr/error.h
      channel/cuda_gdr/factory.h)
    set(TENSORPIPE_HAS_CUDA_GDR_CHANNEL 1)
  endif()

  configure_file(config_cuda.h.in config_cuda.h)

  add_library(tensorpipe_cuda ${TP_STATIC_OR_SHARED} ${TP_CUDA_SRCS})

  if(BUILD_SHARED_LIBS)
    set_target_properties(tensorpipe_cuda PROPERTIES POSITION_INDEPENDENT_CODE 1)
  endif()

  target_link_libraries(tensorpipe_cuda PUBLIC tensorpipe)
  target_link_libraries(tensorpipe_cuda PRIVATE ${TP_CUDA_LINK_LIBRARIES})
  target_include_directories(tensorpipe_cuda PUBLIC ${TP_CUDA_INCLUDE_DIRS})

  install(TARGETS tensorpipe_cuda
          EXPORT TensorpipeTargets
          LIBRARY DESTINATION ${TP_INSTALL_LIBDIR}
          ARCHIVE DESTINATION ${TP_INSTALL_LIBDIR})

  foreach(_header_file ${TP_CUDA_PUBLIC_HDRS})
    get_filename_component(_TP_HEADER_SUBDIR "${_header_file}" DIRECTORY)
    install(FILES ${_header_file}
            DESTINATION ${TP_INSTALL_INCLUDEDIR}/tensorpipe/${_TP_HEADER_SUBDIR})
  endforeach()

  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/config_cuda.h
          DESTINATION ${TP_INSTALL_INCLUDEDIR}/tensorpipe)

endif()


## Python bindings

if(TP_BUILD_PYTHON)
  add_subdirectory(python)
endif()


## Benchmarks

if (TP_BUILD_BENCHMARK)
  add_subdirectory(benchmark)
endif()


## Misc tools

if (TP_BUILD_MISC)
  add_subdirectory(misc)
endif()


## Tests

if(TP_BUILD_TESTING)
  add_subdirectory(test)
endif()


================================================
FILE: tensorpipe/benchmark/CMakeLists.txt
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# TODO: Make those separate CMake projects.

add_executable(benchmark_transport benchmark_transport.cc options.cc transport_registry.cc)
target_link_libraries(benchmark_transport PRIVATE tensorpipe)

add_executable(benchmark_pipe benchmark_pipe.cc options.cc transport_registry.cc channel_registry.cc)
target_link_libraries(benchmark_pipe PRIVATE tensorpipe tensorpipe_cuda)


================================================
FILE: tensorpipe/benchmark/benchmark_pipe.cc
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <cstring>

#include <future>

#include <tensorpipe/benchmark/channel_registry.h>
#include <tensorpipe/benchmark/measurements.h>
#include <tensorpipe/benchmark/options.h>
#include <tensorpipe/benchmark/transport_registry.h>
#include <tensorpipe/common/cpu_buffer.h>
#include <tensorpipe/common/cuda.h>
#include <tensorpipe/common/cuda_buffer.h>
#include <tensorpipe/common/defs.h>
#include <tensorpipe/core/context.h>
#include <tensorpipe/core/listener.h>
#include <tensorpipe/core/pipe.h>

// We might sometimes want to run this benchmark using NCCL instead of
// TensorPipe. We don't want to include NCCL as a submodule and deal with the
// build issues. So we've prepared the code and left it around, but disabled it.
#if USE_NCCL
#include <nccl.h>

#define TP_NCCL_CHECK(op)                   \
  {                                         \
    ncclResult_t res = (op);                \
    TP_THROW_ASSERT_IF(res != ncclSuccess); \
  }

struct NcclCommDeleter {
  void operator()(ncclComm_t comm) {
    TP_NCCL_CHECK(ncclCommDestroy(comm));
  }
};

using NcclComm =
    std::unique_ptr<std::remove_pointer_t<ncclComm_t>, NcclCommDeleter>;

static NcclComm createNcclComm(int rank, int worldSize, ncclUniqueId uniqueId) {
  ncclComm_t comm;
  TP_NCCL_CHECK(ncclCommInitRank(&comm, worldSize, uniqueId, rank));
  return NcclComm(comm, NcclCommDeleter{});
}
#endif // USE_NCCL

using namespace tensorpipe;
using namespace tensorpipe::benchmark;

static constexpr int kNumWarmUpRounds = 5;

using Payload = std::unique_ptr<uint8_t[]>;
using CpuTensor = std::unique_ptr<uint8_t[]>;

struct CudaMemoryDeleter {
  void operator()(void* ptr) {
    TP_CUDA_CHECK(cudaFree(ptr));
  }
};

struct CudaStreamDeleter {
  void operator()(cudaStream_t stream) {
    TP_CUDA_CHECK(cudaStreamDestroy(stream));
  }
};

using CudaTensor = std::unique_ptr<uint8_t[], CudaMemoryDeleter>;
using CudaStream =
    std::unique_ptr<std::remove_pointer_t<cudaStream_t>, CudaStreamDeleter>;

struct Data {
  size_t numPayloads;
  size_t payloadSize;
  std::vector<Payload> expectedPayload;
  std::vector<std::string> expectedPayloadMetadata;
  std::vector<Payload> temporaryPayload;

  size_t numTensors;
  size_t tensorSize;
  TensorType tensorType;
  std::vector<CpuTensor> expectedCpuTensor;
  std::vector<CudaTensor> expectedCudaTensor;
  std::vector<std::string> expectedTensorMetadata;
  std::vector<CpuTensor> temporaryCpuTensor;
  std::vector<CudaTensor> temporaryCudaTensor;
  CudaStream cudaStream;
  size_t cudaSyncPeriod;

  std::string expectedMetadata;

#if USE_NCCL
  NcclComm ncclComm;
#endif // USE_NCCL
};

struct MultiDeviceMeasurements {
  // The CPU time to do each ping-pong.
  Measurements cpu;
  // The CPU time of N iterations, including a final CUDA stream sync.
  Measurements cuda;
};

static void printMeasurements(Measurements& measurements, size_t dataLen) {
  measurements.sort();
  fprintf(
      stderr,
      "%-15s %-15s %-12s %-7s %-7s %-7s %-7s\n",
      "chunk-size",
      "# ping-pong",
      "avg (usec)",
      "p50",
      "p75",
      "p90",
      "p95");
  fprintf(
      stderr,
      "%-15lu %-15lu %-12.3f %-7.3f %-7.3f %-7.3f %-7.3f\n",
      dataLen,
      measurements.size(),
      measurements.sum().count() / (float)measurements.size() / 1000.0,
      measurements.percentile(0.50).count() / 1000.0,
      measurements.percentile(0.75).count() / 1000.0,
      measurements.percentile(0.90).count() / 1000.0,
      measurements.percentile(0.95).count() / 1000.0);
}

static void printMultiDeviceMeasurements(
    MultiDeviceMeasurements& measurements,
    size_t dataLen) {
  printMeasurements(measurements.cpu, dataLen);
  printMeasurements(measurements.cuda, dataLen);
}

static std::unique_ptr<uint8_t[]> createEmptyCpuData(size_t size) {
  return std::make_unique<uint8_t[]>(size);
}

static std::unique_ptr<uint8_t[]> createFullCpuData(size_t size) {
  std::unique_ptr<uint8_t[]> data = createEmptyCpuData(size);
  // Generate fixed data for validation between peers
  for (size_t i = 0; i < size; i++) {
    data[i] = (i >> 8) ^ (i & 0xff);
  }
  return data;
}

static CudaTensor createEmptyCudaData(size_t size) {
  uint8_t* ptr;
  TP_CUDA_CHECK(cudaMalloc(&ptr, size));
  return CudaTensor(ptr);
}

static CudaTensor createFullCudaData(size_t size) {
  uint8_t* ptr;
  TP_CUDA_CHECK(cudaMalloc(&ptr, size));
  CpuTensor data = createFullCpuData(size);
  TP_CUDA_CHECK(cudaMemcpy(ptr, data.get(), size, cudaMemcpyHostToDevice));
  return CudaTensor(ptr);
}

static CudaStream createCudaStream() {
  cudaStream_t stream;
  TP_CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
  return CudaStream(stream);
}

static void serverPongPingNonBlock(
    std::shared_ptr<Pipe> pipe,
    int& numWarmUps,
    int& numRoundTrips,
    std::promise<void>& doneProm,
    Data& data,
    Measurements& measurements) {
#if USE_NCCL
  for (int iterIdx = 0; iterIdx < numWarmUps + numRoundTrips; iterIdx++) {
    // TODO Handle multiple tensors.
    TP_NCCL_CHECK(ncclRecv(
        data.temporaryCudaTensor[0].get(),
        data.tensorSize,
        ncclInt8,
        1,
        data.ncclComm.get(),
        data.cudaStream.get()));
    TP_NCCL_CHECK(ncclSend(
        data.temporaryCudaTensor[0].get(),
        data.tensorSize,
        ncclInt8,
        1,
        data.ncclComm.get(),
        data.cudaStream.get()));
  }
  doneProm.set_value();
  return;
#endif // USE_NCCL
  pipe->readDescriptor(
      [pipe, &numWarmUps, &numRoundTrips, &doneProm, &data, &measurements](
          const Error& error, Descriptor descriptor) {
        TP_THROW_ASSERT_IF(error) << error.what();
        Allocation allocation;
        TP_DCHECK_EQ(descriptor.metadata, data.expectedMetadata);
        if (data.payloadSize > 0) {
          TP_DCHECK_EQ(descriptor.payloads.size(), data.numPayloads);
          allocation.payloads.resize(data.numPayloads);
          for (size_t payloadIdx = 0; payloadIdx < data.numPayloads;
               payloadIdx++) {
            TP_DCHECK_EQ(
                descriptor.payloads[payloadIdx].metadata,
                data.expectedPayloadMetadata[payloadIdx]);
            TP_DCHECK_EQ(
                descriptor.payloads[payloadIdx].length, data.payloadSize);
            allocation.payloads[payloadIdx].data =
                data.temporaryPayload[payloadIdx].get();
          }
        } else {
          TP_DCHECK_EQ(descriptor.payloads.size(), 0);
        }
        if (data.tensorSize > 0) {
          TP_DCHECK_EQ(descriptor.tensors.size(), data.numTensors);
          allocation.tensors.resize(data.numTensors);
          for (size_t tensorIdx = 0; tensorIdx < data.numTensors; tensorIdx++) {
            TP_DCHECK_EQ(
                descriptor.tensors[tensorIdx].metadata,
                data.expectedTensorMetadata[tensorIdx]);
            TP_DCHECK_EQ(descriptor.tensors[tensorIdx].length, data.tensorSize);
            if (data.tensorType == TensorType::kCpu) {
              allocation.tensors[tensorIdx].buffer = CpuBuffer{
                  .ptr = data.temporaryCpuTensor[tensorIdx].get(),
              };
            } else if (data.tensorType == TensorType::kCuda) {
              allocation.tensors[tensorIdx].buffer = CudaBuffer{
                  .ptr = data.temporaryCudaTensor[tensorIdx].get(),
                  .stream = data.cudaStream.get(),
              };
            } else {
              TP_THROW_ASSERT() << "Unknown tensor type";
            }
          }
        } else {
          TP_DCHECK_EQ(descriptor.tensors.size(), 0);
        }

        pipe->read(
            allocation,
            [pipe,
             &numWarmUps,
             &numRoundTrips,
             &doneProm,
             &data,
             &measurements,
             descriptor{std::move(descriptor)},
             allocation](const Error& error) {
              TP_THROW_ASSERT_IF(error) << error.what();

              Message message;
              if (data.payloadSize > 0) {
                TP_DCHECK_EQ(allocation.payloads.size(), data.numPayloads);
                message.payloads.resize(data.numPayloads);
                for (size_t payloadIdx = 0; payloadIdx < data.numPayloads;
                     payloadIdx++) {
                  TP_DCHECK_EQ(
                      descriptor.payloads[payloadIdx].length, data.payloadSize);
                  TP_DCHECK_EQ(
                      memcmp(
                          allocation.payloads[payloadIdx].data,
                          data.expectedPayload[payloadIdx].get(),
                          descriptor.payloads[payloadIdx].length),
                      0);
                  message.payloads[payloadIdx] = {
                      .data = data.expectedPayload[payloadIdx].get(),
                      .length = descriptor.payloads[payloadIdx].length,
                  };
                }
              } else {
                TP_DCHECK_EQ(allocation.payloads.size(), 0);
              }
              if (data.tensorSize > 0) {
                TP_DCHECK_EQ(allocation.tensors.size(), data.numTensors);
                message.tensors.resize(data.numTensors);
                for (size_t tensorIdx = 0; tensorIdx < data.numTensors;
                     tensorIdx++) {
                  TP_DCHECK_EQ(
                      descriptor.tensors[tensorIdx].length, data.tensorSize);
                  if (data.tensorType == TensorType::kCpu) {
                    TP_DCHECK_EQ(
                        memcmp(
                            allocation.tensors[tensorIdx]
                                .buffer.unwrap<CpuBuffer>()
                                .ptr,
                            data.expectedCpuTensor[tensorIdx].get(),
                            descriptor.tensors[tensorIdx].length),
                        0);
                  } else if (data.tensorType == TensorType::kCuda) {
                    // No (easy) way to do a memcmp with CUDA, I believe...
                  } else {
                    TP_THROW_ASSERT() << "Unknown tensor type";
                  }
                  message.tensors[tensorIdx] = {
                      .buffer = allocation.tensors[tensorIdx].buffer,
                      .length = descriptor.tensors[tensorIdx].length,
                      .targetDevice =
                          descriptor.tensors[tensorIdx].sourceDevice,
                  };
                }
              } else {
                TP_DCHECK_EQ(allocation.tensors.size(), 0);
              }

              pipe->write(
                  std::move(message),
                  [pipe,
                   &numWarmUps,
                   &numRoundTrips,
                   &doneProm,
                   &data,
                   &measurements](const Error& error) {
                    TP_THROW_ASSERT_IF(error) << error.what();
                    if (numWarmUps > 0) {
                      numWarmUps -= 1;
                    } else {
                      numRoundTrips -= 1;
                    }
                    if (numRoundTrips > 0) {
                      serverPongPingNonBlock(
                          pipe,
                          numWarmUps,
                          numRoundTrips,
                          doneProm,
                          data,
                          measurements);
                    } else {
                      doneProm.set_value();
                    }
                  });
            });
      });
}

// Start with receiving ping
static void runServer(const Options& options) {
  std::string addr = options.address;
  int numWarmUps = kNumWarmUpRounds;
  int numRoundTrips = options.numRoundTrips;

  Data data;
  data.numPayloads = options.numPayloads;
  data.payloadSize = options.payloadSize;
  for (size_t payloadIdx = 0; payloadIdx < options.numPayloads; payloadIdx++) {
    data.expectedPayload.push_back(createFullCpuData(options.payloadSize));
    data.expectedPayloadMetadata.push_back(
        std::string(options.metadataSize, 0x42));
    data.temporaryPayload.push_back(createEmptyCpuData(options.payloadSize));
  }
  data.numTensors = options.numTensors;
  data.tensorSize = options.tensorSize;
  data.tensorType = options.tensorType;
  for (size_t tensorIdx = 0; tensorIdx < options.numTensors; tensorIdx++) {
    data.expectedTensorMetadata.push_back(
        std::string(options.metadataSize, 0x42));
    if (options.tensorType == TensorType::kCpu) {
      data.expectedCpuTensor.push_back(createFullCpuData(options.tensorSize));
      data.temporaryCpuTensor.push_back(createEmptyCpuData(options.tensorSize));
    } else if (options.tensorType == TensorType::kCuda) {
      data.expectedCudaTensor.push_back(createFullCudaData(options.tensorSize));
      data.temporaryCudaTensor.push_back(
          createEmptyCudaData(options.tensorSize));
      data.cudaStream = createCudaStream();
    } else {
      TP_THROW_ASSERT() << "Unknown tensor type";
    }
  }
  data.cudaSyncPeriod = options.cudaSyncPeriod;
  data.expectedMetadata = std::string(options.metadataSize, 0x42);

  Measurements measurements;
  measurements.reserve(options.numRoundTrips);

  std::shared_ptr<Context> context = std::make_shared<Context>();
  auto transportContext =
      TensorpipeTransportRegistry().create(options.transport);
  validateTransportContext(transportContext);
  context->registerTransport(0, options.transport, transportContext);

  auto channelContext = TensorpipeChannelRegistry().create(options.channel);
  validateChannelContext(channelContext);
  context->registerChannel(0, options.channel, channelContext);

  std::promise<std::shared_ptr<Pipe>> pipeProm;
  std::shared_ptr<Listener> listener = context->listen({addr});
  listener->accept([&](const Error& error, std::shared_ptr<Pipe> pipe) {
    TP_THROW_ASSERT_IF(error) << error.what();
    pipeProm.set_value(std::move(pipe));
  });
  std::shared_ptr<Pipe> pipe = pipeProm.get_future().get();

#if USE_NCCL
  std::promise<ncclUniqueId> uniqueIdProm;
  pipe->readDescriptor([&](const Error& error, Descriptor descriptor) {
    TP_THROW_ASSERT_IF(error) << error.what();
    uniqueIdProm.set_value(
        *reinterpret_cast<const ncclUniqueId*>(descriptor.metadata.c_str()));
  });
  ncclUniqueId uniqueId = uniqueIdProm.get_future().get();

  data.ncclComm = createNcclComm(/*rank=*/0, /*worldSize=*/2, uniqueId);
#endif

  std::promise<void> doneProm;
  serverPongPingNonBlock(
      std::move(pipe), numWarmUps, numRoundTrips, doneProm, data, measurements);

  doneProm.get_future().get();
  listener.reset();
  context->join();
}

static void clientPingPongNonBlock(
    std::shared_ptr<Pipe> pipe,
    int& numWarmUps,
    int& numRoundTrips,
    std::promise<void>& doneProm,
    Data& data,
    MultiDeviceMeasurements& measurements) {
#if USE_NCCL
  for (int iterIdx = 0; iterIdx < numWarmUps + numRoundTrips; iterIdx++) {
    if (iterIdx >= numWarmUps) {
      measurements.cpu.markStart();
      if ((iterIdx - numWarmUps) % data.cudaSyncPeriod == 0) {
        measurements.cuda.markStart();
      }
    }
    TP_NCCL_CHECK(ncclSend(
        data.expectedCudaTensor[0].get(),
        data.tensorSize,
        ncclInt8,
        0,
        data.ncclComm.get(),
        data.cudaStream.get()));
    TP_NCCL_CHECK(ncclRecv(
        data.temporaryCudaTensor[0].get(),
        data.tensorSize,
        ncclInt8,
        0,
        data.ncclComm.get(),
        data.cudaStream.get()));
    if (iterIdx >= numWarmUps) {
      measurements.cpu.markStop();
      if ((iterIdx - numWarmUps + 1) % data.cudaSyncPeriod == 0) {
        TP_CUDA_CHECK(cudaStreamSynchronize(data.cudaStream.get()));
        measurements.cuda.markStop(data.cudaSyncPeriod);
      }
    }
  }
  printMultiDeviceMeasurements(measurements, data.payloadSize);
  doneProm.set_value();
  return;
#endif // USE_NCCL
  if (numWarmUps == 0) {
    measurements.cpu.markStart();
    if (numRoundTrips % data.cudaSyncPeriod == 0) {
      measurements.cuda.markStart();
    }
  }
  Message message;
  message.metadata = data.expectedMetadata;
  if (data.payloadSize > 0) {
    for (size_t payloadIdx = 0; payloadIdx < data.numPayloads; payloadIdx++) {
      Message::Payload payload;
      payload.data = data.expectedPayload[payloadIdx].get();
      payload.length = data.payloadSize;
      message.payloads.push_back(std::move(payload));
    }
  } else {
    TP_DCHECK_EQ(message.payloads.size(), 0);
  }
  if (data.tensorSize > 0) {
    for (size_t tensorIdx = 0; tensorIdx < data.numTensors; tensorIdx++) {
      Message::Tensor tensor;
      tensor.length = data.tensorSize;
      if (data.tensorType == TensorType::kCpu) {
        tensor.buffer =
            CpuBuffer{.ptr = data.expectedCpuTensor[tensorIdx].get()};
        tensor.targetDevice = Device(kCpuDeviceType, 0);
      } else if (data.tensorType == TensorType::kCuda) {
        tensor.buffer = CudaBuffer{
            .ptr = data.expectedCudaTensor[tensorIdx].get(),
            .stream = data.cudaStream.get(),
        };
        tensor.targetDevice = Device(kCudaDeviceType, 0);
      } else {
        TP_THROW_ASSERT() << "Unknown tensor type";
      }
      message.tensors.push_back(std::move(tensor));
    }
  } else {
    TP_DCHECK_EQ(message.tensors.size(), 0);
  }
  pipe->write(
      std::move(message),
      [pipe, &numWarmUps, &numRoundTrips, &doneProm, &data, &measurements](
          const Error& error) {
        TP_THROW_ASSERT_IF(error) << error.what();
        pipe->readDescriptor([pipe,
                              &numWarmUps,
                              &numRoundTrips,
                              &doneProm,
                              &data,
                              &measurements](
                                 const Error& error, Descriptor descriptor) {
          TP_THROW_ASSERT_IF(error) << error.what();

          Allocation allocation;
          TP_DCHECK_EQ(descriptor.metadata, data.expectedMetadata);
          if (data.payloadSize > 0) {
            TP_DCHECK_EQ(descriptor.payloads.size(), data.numPayloads);
            allocation.payloads.resize(data.numPayloads);
            for (size_t payloadIdx = 0; payloadIdx < data.numPayloads;
                 payloadIdx++) {
              TP_DCHECK_EQ(
                  descriptor.payloads[payloadIdx].metadata,
                  data.expectedPayloadMetadata[payloadIdx]);
              TP_DCHECK_EQ(
                  descriptor.payloads[payloadIdx].length, data.payloadSize);
              allocation.payloads[payloadIdx].data =
                  data.temporaryPayload[payloadIdx].get();
            }
          } else {
            TP_DCHECK_EQ(descriptor.payloads.size(), 0);
          }
          if (data.tensorSize > 0) {
            TP_DCHECK_EQ(descriptor.tensors.size(), data.numTensors);
            allocation.tensors.resize(data.numTensors);
            for (size_t tensorIdx = 0; tensorIdx < data.numTensors;
                 tensorIdx++) {
              TP_DCHECK_EQ(
                  descriptor.tensors[tensorIdx].metadata,
                  data.expectedTensorMetadata[tensorIdx]);
              TP_DCHECK_EQ(
                  descriptor.tensors[tensorIdx].length, data.tensorSize);
              if (data.tensorType == TensorType::kCpu) {
                allocation.tensors[tensorIdx].buffer = CpuBuffer{
                    .ptr = data.temporaryCpuTensor[tensorIdx].get(),
                };
              } else if (data.tensorType == TensorType::kCuda) {
                allocation.tensors[tensorIdx].buffer = CudaBuffer{
                    .ptr = data.temporaryCudaTensor[tensorIdx].get(),
                    .stream = data.cudaStream.get(),
                };
              } else {
                TP_THROW_ASSERT() << "Unknown tensor type";
              }
            }
          } else {
            TP_DCHECK_EQ(descriptor.tensors.size(), 0);
          }
          pipe->read(
              allocation,
              [pipe,
               &numWarmUps,
               &numRoundTrips,
               &doneProm,
               &data,
               &measurements,
               descriptor{std::move(descriptor)},
               allocation](const Error& error) {
                if (numWarmUps == 0) {
                  measurements.cpu.markStop();
                  if ((numRoundTrips - 1) % data.cudaSyncPeriod == 0) {
                    TP_CUDA_CHECK(cudaStreamSynchronize(data.cudaStream.get()));
                    measurements.cuda.markStop(data.cudaSyncPeriod);
                  }
                }
                TP_THROW_ASSERT_IF(error) << error.what();
                if (data.payloadSize > 0) {
                  TP_DCHECK_EQ(allocation.payloads.size(), data.numPayloads);
                  for (size_t payloadIdx = 0; payloadIdx < data.numPayloads;
                       payloadIdx++) {
                    TP_DCHECK_EQ(
                        memcmp(
                            allocation.payloads[payloadIdx].data,
                            data.expectedPayload[payloadIdx].get(),
                            descriptor.payloads[payloadIdx].length),
                        0);
                  }
                } else {
                  TP_DCHECK_EQ(allocation.payloads.size(), 0);
                }
                if (data.tensorSize > 0) {
                  TP_DCHECK_EQ(allocation.tensors.size(), data.numTensors);
                  for (size_t tensorIdx = 0; tensorIdx < data.numTensors;
                       tensorIdx++) {
                    if (data.tensorType == TensorType::kCpu) {
                      TP_DCHECK_EQ(
                          memcmp(
                              allocation.tensors[tensorIdx]
                                  .buffer.unwrap<CpuBuffer>()
                                  .ptr,
                              data.expectedCpuTensor[tensorIdx].get(),
                              descriptor.tensors[tensorIdx].length),
                          0);
                    } else if (data.tensorType == TensorType::kCuda) {
                      // No (easy) way to do a memcmp with CUDA, I
                      // believe...
                    } else {
                      TP_THROW_ASSERT() << "Unknown tensor type";
                    }
                  }
                } else {
                  TP_DCHECK_EQ(allocation.tensors.size(), 0);
                }
                if (numWarmUps > 0) {
                  numWarmUps -= 1;
                } else {
                  numRoundTrips -= 1;
                }
                if (numRoundTrips > 0) {
                  clientPingPongNonBlock(
                      pipe,
                      numWarmUps,
                      numRoundTrips,
                      doneProm,
                      data,
                      measurements);
                } else {
                  printMultiDeviceMeasurements(measurements, data.payloadSize);
                  doneProm.set_value();
                }
              });
        });
      });
}

// Start with sending ping
static void runClient(const Options& options) {
  std::string addr = options.address;
  int numWarmUps = kNumWarmUpRounds;
  int numRoundTrips = options.numRoundTrips;

  Data data;
  data.numPayloads = options.numPayloads;
  data.payloadSize = options.payloadSize;
  for (size_t payloadIdx = 0; payloadIdx < options.numPayloads; payloadIdx++) {
    data.expectedPayload.push_back(createFullCpuData(options.payloadSize));
    data.expectedPayloadMetadata.push_back(
        std::string(options.metadataSize, 0x42));
    data.temporaryPayload.push_back(createEmptyCpuData(options.payloadSize));
  }
  data.numTensors = options.numTensors;
  data.tensorSize = options.tensorSize;
  data.tensorType = options.tensorType;
  for (size_t tensorIdx = 0; tensorIdx < options.numTensors; tensorIdx++) {
    data.expectedTensorMetadata.push_back(
        std::string(options.metadataSize, 0x42));
    if (data.tensorType == TensorType::kCpu) {
      data.expectedCpuTensor.push_back(createFullCpuData(options.tensorSize));
      data.temporaryCpuTensor.push_back(createEmptyCpuData(options.tensorSize));
    } else if (data.tensorType == TensorType::kCuda) {
      data.expectedCudaTensor.push_back(createFullCudaData(options.tensorSize));
      data.temporaryCudaTensor.push_back(
          createEmptyCudaData(options.tensorSize));
      data.cudaStream = createCudaStream();
    } else {
      TP_THROW_ASSERT() << "Unknown tensor type";
    }
  }
  data.cudaSyncPeriod = options.cudaSyncPeriod;
  data.expectedMetadata = std::string(options.metadataSize, 0x42);

  MultiDeviceMeasurements measurements;
  measurements.cpu.reserve(options.numRoundTrips);
  measurements.cuda.reserve(options.numRoundTrips / data.cudaSyncPeriod);

  std::shared_ptr<Context> context = std::make_shared<Context>();
  auto transportContext =
      TensorpipeTransportRegistry().create(options.transport);
  validateTransportContext(transportContext);
  context->registerTransport(0, options.transport, transportContext);

  auto channelContext = TensorpipeChannelRegistry().create(options.channel);
  validateChannelContext(channelContext);
  context->registerChannel(0, options.channel, channelContext);

  std::shared_ptr<Pipe> pipe = context->connect(addr);

#if USE_NCCL
  ncclUniqueId uniqueId;
  TP_NCCL_CHECK(ncclGetUniqueId(&uniqueId));
  Message message;
  message.metadata = std::string(
      reinterpret_cast<char*>(&uniqueId),
      reinterpret_cast<char*>(&uniqueId) + sizeof(ncclUniqueId));
  std::promise<void> promise;
  pipe->write(std::move(message), [&](const Error& error) {
    TP_THROW_ASSERT_IF(error) << error.what();
    promise.set_value();
  });
  promise.get_future().get();

  data.ncclComm = createNcclComm(/*rank=*/1, /*worldSize=*/2, uniqueId);
#endif // USE_NCCL

  std::promise<void> doneProm;
  clientPingPongNonBlock(
      std::move(pipe), numWarmUps, numRoundTrips, doneProm, data, measurements);

  doneProm.get_future().get();
  context->join();
}

int main(int argc, char** argv) {
  struct Options x = parseOptions(argc, argv);
  std::cout << "mode = " << x.mode << "\n";
  std::cout << "transport = " << x.transport << "\n";
  std::cout << "channel = " << x.channel << "\n";
  std::cout << "address = " << x.address << "\n";
  std::cout << "num_round_trips = " << x.numRoundTrips << "\n";
  std::cout << "num_payloads = " << x.numPayloads << "\n";
  std::cout << "payload_size = " << x.payloadSize << "\n";
  std::cout << "num_tensors = " << x.numTensors << "\n";
  std::cout << "tensor_size = " << x.tensorSize << "\n";
  std::cout << "tensor_type = "
            << (x.tensorType == TensorType::kCpu ? "cpu" : "cuda") << "\n";
  std::cout << "metadata_size = " << x.metadataSize << "\n";

  if (x.mode == "listen") {
    runServer(x);
  } else if (x.mode == "connect") {
    runClient(x);
  } else {
    // Should never be here
    TP_THROW_ASSERT() << "unknown mode: " << x.mode;
  }

  return 0;
}


================================================
FILE: tensorpipe/benchmark/benchmark_transport.cc
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <cstring>

#include <future>

#include <tensorpipe/benchmark/measurements.h>
#include <tensorpipe/benchmark/options.h>
#include <tensorpipe/benchmark/transport_registry.h>
#include <tensorpipe/common/defs.h>
#include <tensorpipe/transport/connection.h>
#include <tensorpipe/transport/listener.h>

using namespace tensorpipe;
using namespace tensorpipe::benchmark;
using namespace tensorpipe::transport;

struct Data {
  std::unique_ptr<uint8_t[]> expected;
  std::unique_ptr<uint8_t[]> temporary;
  size_t size;
};

static void printMeasurements(Measurements& measurements, size_t dataLen) {
  measurements.sort();
  fprintf(
      stderr,
      "%-15s %-15s %-12s %-7s %-7s %-7s %-7s\n",
      "chunk-size",
      "# ping-pong",
      "avg (usec)",
      "p50",
      "p75",
      "p90",
      "p95");
  fprintf(
      stderr,
      "%-15lu %-15lu %-12.3f %-7.3f %-7.3f %-7.3f %-7.3f\n",
      dataLen,
      measurements.size(),
      measurements.sum().count() / (float)measurements.size() / 1000.0,
      measurements.percentile(0.50).count() / 1000.0,
      measurements.percentile(0.75).count() / 1000.0,
      measurements.percentile(0.90).count() / 1000.0,
      measurements.percentile(0.95).count() / 1000.0);
}

static std::unique_ptr<uint8_t[]> createData(const int size) {
  auto data = std::make_unique<uint8_t[]>(size);
  // Generate fixed data for validation between peers
  for (int i = 0; i < size; i++) {
    data[i] = (i >> 8) ^ (i & 0xff);
  }
  return data;
}

static void serverPongPingNonBlock(
    std::shared_ptr<Connection> conn,
    int& numRoundTrips,
    std::promise<void>& doneProm,
    Data& data,
    Measurements& measurements) {
  conn->read(
      data.temporary.get(),
      data.size,
      [conn, &numRoundTrips, &doneProm, &data, &measurements](
          const Error& error, const void* ptr, size_t len) {
        TP_THROW_ASSERT_IF(error) << error.what();
        TP_DCHECK_EQ(len, data.size);
        TP_DCHECK_EQ(memcmp(ptr, data.expected.get(), len), 0);
        conn->write(
            data.temporary.get(),
            data.size,
            [conn, &numRoundTrips, &doneProm, &data, &measurements](
                const Error& error) {
              TP_THROW_ASSERT_IF(error) << error.what();
              if (--numRoundTrips > 0) {
                serverPongPingNonBlock(
                    conn, numRoundTrips, doneProm, data, measurements);
              } else {
                doneProm.set_value();
              }
            });
      });
}

// Start with receiving ping
static void runServer(const Options& options) {
  std::string addr = options.address;
  int numRoundTrips = options.numRoundTrips;
  Data data = {
      createData(options.payloadSize),
      std::make_unique<uint8_t[]>(options.payloadSize),
      options.payloadSize};
  Measurements measurements;
  measurements.reserve(options.numRoundTrips);

  std::shared_ptr<transport::Context> context;
  context = TensorpipeTransportRegistry().create(options.transport);
  validateTransportContext(context);

  std::promise<std::shared_ptr<Connection>> connProm;
  std::shared_ptr<transport::Listener> listener = context->listen(addr);
  listener->accept([&](const Error& error, std::shared_ptr<Connection> conn) {
    TP_THROW_ASSERT_IF(error) << error.what();
    connProm.set_value(std::move(conn));
  });
  std::shared_ptr<Connection> conn = connProm.get_future().get();

  std::promise<void> doneProm;
  serverPongPingNonBlock(
      std::move(conn), numRoundTrips, doneProm, data, measurements);

  doneProm.get_future().get();
  context->join();
}

static void clientPingPongNonBlock(
    std::shared_ptr<Connection> conn,
    int& numRoundTrips,
    std::promise<void>& doneProm,
    Data& data,
    Measurements& measurements) {
  measurements.markStart();
  conn->write(
      data.expected.get(),
      data.size,
      [conn, &numRoundTrips, &doneProm, &data, &measurements](
          const Error& error) {
        TP_THROW_ASSERT_IF(error) << error.what();
        conn->read(
            data.temporary.get(),
            data.size,
            [conn, &numRoundTrips, &doneProm, &data, &measurements](
                const Error& error, const void* ptr, size_t len) {
              measurements.markStop();
              TP_THROW_ASSERT_IF(error) << error.what();
              TP_DCHECK_EQ(len, data.size);
              TP_DCHECK_EQ(memcmp(ptr, data.expected.get(), len), 0);
              if (--numRoundTrips > 0) {
                clientPingPongNonBlock(
                    conn, numRoundTrips, doneProm, data, measurements);
              } else {
                printMeasurements(measurements, data.size);
                doneProm.set_value();
              }
            });
      });
}

// Start with sending ping
static void runClient(const Options& options) {
  std::string addr = options.address;
  int numRoundTrips = options.numRoundTrips;
  Data data = {
      createData(options.payloadSize),
      std::make_unique<uint8_t[]>(options.payloadSize),
      options.payloadSize};
  Measurements measurements;
  measurements.reserve(options.numRoundTrips);

  std::shared_ptr<transport::Context> context;
  context = TensorpipeTransportRegistry().create(options.transport);
  validateTransportContext(context);
  std::shared_ptr<Connection> conn = context->connect(addr);

  std::promise<void> doneProm;
  clientPingPongNonBlock(
      std::move(conn), numRoundTrips, doneProm, data, measurements);

  doneProm.get_future().get();
  context->join();
}

int main(int argc, char** argv) {
  struct Options x = parseOptions(argc, argv);
  std::cout << "mode = " << x.mode << "\n";
  std::cout << "transport = " << x.transport << "\n";
  std::cout << "address = " << x.address << "\n";
  std::cout << "num_round_trips = " << x.numRoundTrips << "\n";
  std::cout << "payload_size = " << x.payloadSize << "\n";

  if (x.mode == "listen") {
    runServer(x);
  } else if (x.mode == "connect") {
    runClient(x);
  } else {
    // Should never be here
    TP_THROW_ASSERT() << "unknown mode: " << x.mode;
  }

  return 0;
}


================================================
FILE: tensorpipe/benchmark/channel_registry.cc
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <tensorpipe/benchmark/channel_registry.h>

#include <tensorpipe/tensorpipe.h>
#include <tensorpipe/tensorpipe_cuda.h>

TP_DEFINE_SHARED_REGISTRY(
    TensorpipeChannelRegistry,
    tensorpipe::channel::Context);

// BASIC

std::shared_ptr<tensorpipe::channel::Context> makeBasicChannel() {
  return tensorpipe::channel::basic::create();
}

TP_REGISTER_CREATOR(TensorpipeChannelRegistry, basic, makeBasicChannel);

// CMA

#if TENSORPIPE_HAS_CMA_CHANNEL
std::shared_ptr<tensorpipe::channel::Context> makeCmaChannel() {
  return tensorpipe::channel::cma::create();
}

TP_REGISTER_CREATOR(TensorpipeChannelRegistry, cma, makeCmaChannel);
#endif // TENSORPIPE_HAS_CMA_CHANNEL

// MPT

std::shared_ptr<tensorpipe::channel::Context> makeMptChannel() {
  throw std::runtime_error("mtp channel requires arguments");
}

TP_REGISTER_CREATOR(TensorpipeChannelRegistry, mpt, makeMptChannel);

// XTH

std::shared_ptr<tensorpipe::channel::Context> makeXthChannel() {
  return tensorpipe::channel::xth::create();
}

TP_REGISTER_CREATOR(TensorpipeChannelRegistry, xth, makeXthChannel);

// CUDA XTH

std::shared_ptr<tensorpipe::channel::Context> makeCudaXthChannel() {
  return tensorpipe::channel::cuda_xth::create();
}

TP_REGISTER_CREATOR(TensorpipeChannelRegistry, cuda_xth, makeCudaXthChannel);

// CUDA BASIC

std::shared_ptr<tensorpipe::channel::Context> makeCudaBasicChannel() {
  return tensorpipe::channel::cuda_basic::create(
      tensorpipe::channel::basic::create());
}

TP_REGISTER_CREATOR(
    TensorpipeChannelRegistry,
    cuda_basic,
    makeCudaBasicChannel);

// CUDA IPC

#if TENSORPIPE_HAS_CUDA_IPC_CHANNEL
std::shared_ptr<tensorpipe::channel::Context> makeCudaIpcChannel() {
  return tensorpipe::channel::cuda_ipc::create();
}

TP_REGISTER_CREATOR(TensorpipeChannelRegistry, cuda_ipc, makeCudaIpcChannel);
#endif // TENSORPIPE_HAS_CUDA_IPC_CHANNEL

// CUDA GDR

#if TENSORPIPE_HAS_CUDA_GDR_CHANNEL
std::shared_ptr<tensorpipe::channel::Context> makeCudaGdrChannel() {
  return tensorpipe::channel::cuda_gdr::create();
}

TP_REGISTER_CREATOR(TensorpipeChannelRegistry, cuda_gdr, makeCudaGdrChannel);
#endif // TENSORPIPE_HAS_CUDA_GDR_CHANNEL

void validateChannelContext(
    std::shared_ptr<tensorpipe::channel::Context> context) {
  if (!context) {
    auto keys = TensorpipeChannelRegistry().keys();
    std::cout
        << "The channel you passed in is not supported. The following channels are valid: ";
    for (const auto& key : keys) {
      std::cout << key << ", ";
    }
    std::cout << "\n";
    exit(EXIT_FAILURE);
  }
}


================================================
FILE: tensorpipe/benchmark/channel_registry.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <tensorpipe/benchmark/registry.h>
#include <tensorpipe/channel/context.h>

TP_DECLARE_SHARED_REGISTRY(
    TensorpipeChannelRegistry,
    tensorpipe::channel::Context);

void validateChannelContext(
    std::shared_ptr<tensorpipe::channel::Context> context);


================================================
FILE: tensorpipe/benchmark/measurements.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <algorithm>
#include <chrono>
#include <vector>

namespace tensorpipe {
namespace benchmark {

class Measurements {
  using clock = std::chrono::high_resolution_clock;
  using nanoseconds = std::chrono::nanoseconds;

 public:
  void markStart() {
    start_ = clock::now();
  }

  void markStop(size_t count = 1) {
    samples_.push_back((clock::now() - start_) / count);
  }

  void sort() {
    std::sort(samples_.begin(), samples_.end());
  }

  void reserve(size_t capacity) {
    samples_.reserve(capacity);
  }

  size_t size() const {
    return samples_.size();
  }

  nanoseconds sum() const {
    nanoseconds sum{0};
    for (const auto& sample : samples_) {
      sum += sample;
    }
    return sum;
  }

  nanoseconds percentile(float f) const {
    return samples_[static_cast<size_t>(f * samples_.size())];
  }

 private:
  clock::time_point start_;
  std::vector<nanoseconds> samples_;
};

} // namespace benchmark
} // namespace tensorpipe


================================================
FILE: tensorpipe/benchmark/options.cc
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <tensorpipe/benchmark/options.h>

#include <getopt.h>
#include <stdio.h>
#include <stdlib.h>

namespace tensorpipe {
namespace benchmark {

static void usage(int status, const char* argv0) {
  if (status != EXIT_SUCCESS) {
    fprintf(stderr, "`%s --help' for more information.\n", argv0);
    exit(status);
  }

  fprintf(stderr, "Usage: %s [OPTIONS]\n", argv0);
#define X(x) fputs(x "\n", stderr);
  X("");
  X("--mode=MODE                      Running mode [listen|connect]");
  X("--transport=TRANSPORT            Transport backend [shm|uv]");
  X("--channel=CHANNEL                Channel backend [basic]");
  X("--address=ADDRESS                Address to listen or connect to");
  X("--num-round-trips=NUM            Number of write/read pairs to perform");
  X("--num-payloads=NUM [optional]    Number of payloads of each write/read pair");
  X("--payload-size=SIZE [optional]   Size of payload of each write/read pair");
  X("--num-tensors=NUM [optional]     Number of tensors of each write/read pair");
  X("--tensor-size=SIZE [optional]    Size of tensor of each write/read pair");
  X("--tensor-type=TYPE [optional]    Type of tensor (cpu or cuda)");
  X("--metadata-size=SIZE [optional]  Size of metadata of each write/read pair");
  X("--cuda-sync-period=NUM [optiona] Number of round-trips between two stream syncs");

  exit(status);
}

static void validateOptions(Options options, const char* argv0) {
  int status = EXIT_SUCCESS;
  if (options.mode.empty()) {
    fprintf(stderr, "Missing argument: --mode must be set\n");
    status = EXIT_FAILURE;
  }
  if (options.transport.empty()) {
    fprintf(stderr, "Missing argument: --transport must be set\n");
    status = EXIT_FAILURE;
  }
  if (options.address.empty()) {
    fprintf(stderr, "Missing argument: --address must be set\n");
    status = EXIT_FAILURE;
  }
  if (options.numRoundTrips <= 0) {
    fprintf(stderr, "Missing argument: --num-round-trips must be set\n");
    status = EXIT_FAILURE;
  }
  if (status != EXIT_SUCCESS) {
    usage(status, argv0);
  }
}

struct Options parseOptions(int argc, char** argv) {
  struct Options options;
  int opt;
  int flag = -1;

  enum Flags : int {
    MODE,
    TRANSPORT,
    CHANNEL,
    ADDRESS,
    NUM_ROUND_TRIPS,
    NUM_PAYLOADS,
    PAYLOAD_SIZE,
    NUM_TENSORS,
    TENSOR_SIZE,
    TENSOR_TYPE,
    METADATA_SIZE,
    CUDA_SYNC_PERIOD,
    HELP,
  };

  static struct option longOptions[] = {
      {"mode", required_argument, &flag, MODE},
      {"transport", required_argument, &flag, TRANSPORT},
      {"channel", required_argument, &flag, CHANNEL},
      {"address", required_argument, &flag, ADDRESS},
      {"num-round-trips", required_argument, &flag, NUM_ROUND_TRIPS},
      {"num-payloads", required_argument, &flag, NUM_PAYLOADS},
      {"payload-size", required_argument, &flag, PAYLOAD_SIZE},
      {"num-tensors", required_argument, &flag, NUM_TENSORS},
      {"tensor-size", required_argument, &flag, TENSOR_SIZE},
      {"tensor-type", required_argument, &flag, TENSOR_TYPE},
      {"metadata-size", required_argument, &flag, METADATA_SIZE},
      {"cuda-sync-period", required_argument, &flag, CUDA_SYNC_PERIOD},
      {"help", no_argument, &flag, HELP},
      {nullptr, 0, nullptr, 0}};

  while (1) {
    opt = getopt_long(argc, argv, "", longOptions, nullptr);
    if (opt == -1) {
      break;
    }
    if (opt != 0) {
      usage(EXIT_FAILURE, argv[0]);
      break;
    }
    switch (flag) {
      case MODE:
        options.mode = std::string(optarg);
        if (options.mode != "listen" && options.mode != "connect") {
          fprintf(stderr, "Error:\n");
          fprintf(stderr, "  --mode must be [listen|connect]\n");
          exit(EXIT_FAILURE);
        }
        break;
      case TRANSPORT:
        options.transport = std::string(optarg);
        break;
      case CHANNEL:
        options.channel = std::string(optarg);
        break;
      case ADDRESS:
        options.address = std::string(optarg);
        break;
      case NUM_ROUND_TRIPS:
        options.numRoundTrips = std::strtol(optarg, nullptr, 10);
        break;
      case NUM_PAYLOADS:
        options.numPayloads = std::strtoull(optarg, nullptr, 10);
        break;
      case PAYLOAD_SIZE:
        options.payloadSize = std::strtoull(optarg, nullptr, 10);
        break;
      case NUM_TENSORS:
        options.numTensors = std::strtoull(optarg, nullptr, 10);
        break;
      case TENSOR_SIZE:
        options.tensorSize = std::strtoull(optarg, nullptr, 10);
        break;
      case TENSOR_TYPE:
        if (std::string(optarg) == "cpu") {
          options.tensorType = TensorType::kCpu;
        } else if (std::string(optarg) == "cuda") {
          options.tensorType = TensorType::kCuda;
        } else {
          fprintf(stderr, "Error:\n");
          fprintf(stderr, "  --tensor-type must be [cpu|cuda]\n");
          exit(EXIT_FAILURE);
        }
        break;
      case METADATA_SIZE:
        options.metadataSize = std::strtoull(optarg, nullptr, 10);
        break;
      case CUDA_SYNC_PERIOD:
        options.cudaSyncPeriod = std::strtoull(optarg, nullptr, 10);
        break;
      case HELP:
        usage(EXIT_SUCCESS, argv[0]);
        break;
      default:
        usage(EXIT_FAILURE, argv[0]);
        break;
    }
  }

  validateOptions(options, argv[0]);

  return options;
}

} // namespace benchmark
} // namespace tensorpipe


================================================
FILE: tensorpipe/benchmark/options.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <string>

#include <tensorpipe/channel/context.h>
#include <tensorpipe/transport/context.h>

namespace tensorpipe {
namespace benchmark {

enum class TensorType {
  kCpu,
  kCuda,
};

struct Options {
  std::string mode; // server or client
  std::string transport; // shm or uv
  std::string channel; // basic
  std::string address; // address for listen or connect
  int numRoundTrips{0}; // number of write/read pairs
  size_t numPayloads{0};
  size_t payloadSize{0};
  size_t numTensors{0};
  size_t tensorSize{0};
  TensorType tensorType{TensorType::kCpu};
  size_t metadataSize{0};
  size_t cudaSyncPeriod{1};
};

struct Options parseOptions(int argc, char** argv);

} // namespace benchmark
} // namespace tensorpipe


================================================
FILE: tensorpipe/benchmark/registry.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

// NB: This Registry works poorly when you have other namespaces.

/**
 * Simple registry implementation that uses static variables to
 * register object creators during program initialization time. This registry
 * implementation is largely borrowed from the PyTorch registry utility in file
 * pytorch/c10/util/Registry.h.
 */

#pragma once

#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <functional>
#include <iostream>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>

namespace tensorpipe {

/**
 * @brief A template class that allows one to register classes by keys.
 *
 * The keys are usually a std::string specifying the name, but can be anything
 * that can be used in a std::map.
 *
 * You should most likely not use the Registry class explicitly, but use the
 * helper macros below to declare specific registries as well as registering
 * objects.
 */
template <class ObjectPtrType, class... Args>
class Registry {
 public:
  typedef std::function<ObjectPtrType(Args...)> Creator;

  Registry() : registry_() {}

  // Adds a key and its associated creator to the desired registry. If the key
  // already exists in the registry, we simply replace the old creator
  // with the new args for the key.
  void registerCreator(std::string key, Creator creator) {
    registry_[key] = creator;
  }

  // Allows you to register and key/Creator pair and provide a help_messge for
  // the key as well.
  void registerCreator(
      std::string key,
      Creator creator,
      const std::string& helpMsg) {
    registerCreator(key, creator);
    helpMessage_[key] = helpMsg;
  }

  // Returns whether a particular key exists in the given registry.
  inline bool has(std::string key) {
    return (registry_.count(key) != 0);
  }

  // Given the key, create() invokes the creator with the provided args and
  // returns the object that the creator function constructs.
  ObjectPtrType create(std::string key, Args... args) {
    if (registry_.count(key) == 0) {
      // Returns nullptr if the key is not registered.
      return nullptr;
    }
    return registry_[key](args...);
  }

  // Returns the registered keys as a std::vector.
  std::vector<std::string> keys() const {
    std::vector<std::string> keys;
    for (const auto& it : registry_) {
      keys.push_back(it.first);
    }
    return keys;
  }

  // Returns the help_message for the key if one is provided.
  inline const std::unordered_map<std::string, std::string>& helpMessage()
      const {
    return helpMessage_;
  }

  const char* helpMessage(std::string key) const {
    auto it = helpMessage_.find(key);
    if (it == helpMessage_.end()) {
      return nullptr;
    }
    return it->second.c_str();
  }

 private:
  std::unordered_map<std::string, Creator> registry_;
  std::unordered_map<std::string, std::string> helpMessage_;
};

// Registerer is a class template that simplifies Register-ing keys for a given
// registry.
template <class ObjectPtrType, class... Args>
class Registerer {
 public:
  explicit Registerer(
      std::string key,
      Registry<ObjectPtrType, Args...>& registry,
      typename Registry<ObjectPtrType, Args...>::Creator creator,
      const std::string& helpMsg = "") {
    registry.registerCreator(key, creator, helpMsg);
  }
};

// The following macros should be used to create/add to registries. Avoid
// invoking the Registry class template functions directly.

#define TP_CONCATENATE_IMPL(s1, s2) s1##s2
#define TP_CONCATENATE(s1, s2) TP_CONCATENATE_IMPL(s1, s2)
#define TP_ANONYMOUS_VARIABLE(str) TP_CONCATENATE(str, __LINE__)

// Using the construct on first use idiom to avoid static order initialization
// issue. Refer to this link for reference:
// https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use
#define TP_DEFINE_TYPED_REGISTRY(RegistryName, ObjectType, PtrType, ...)     \
  tensorpipe::Registry<PtrType<ObjectType>, ##__VA_ARGS__>& RegistryName() { \
    static tensorpipe::Registry<PtrType<ObjectType>, ##__VA_ARGS__>*         \
        registry =                                                           \
            new tensorpipe::Registry<PtrType<ObjectType>, ##__VA_ARGS__>();  \
    return *registry;                                                        \
  }

#define TP_DECLARE_TYPED_REGISTRY(RegistryName, ObjectType, PtrType, ...)   \
  tensorpipe::Registry<PtrType<ObjectType>, ##__VA_ARGS__>& RegistryName(); \
  typedef tensorpipe::Registerer<PtrType<ObjectType>, ##__VA_ARGS__>        \
      Registerer##RegistryName

#define TP_DEFINE_SHARED_REGISTRY(RegistryName, ObjectType, ...) \
  TP_DEFINE_TYPED_REGISTRY(                                      \
      RegistryName, ObjectType, std::shared_ptr, ##__VA_ARGS__)

#define TP_DECLARE_SHARED_REGISTRY(RegistryName, ObjectType, ...) \
  TP_DECLARE_TYPED_REGISTRY(                                      \
      RegistryName, ObjectType, std::shared_ptr, ##__VA_ARGS__)

#define TP_REGISTER_TYPED_CREATOR(RegistryName, key, ...)                  \
  static Registerer##RegistryName TP_ANONYMOUS_VARIABLE(g_##RegistryName)( \
      key, RegistryName(), ##__VA_ARGS__);

#define TP_REGISTER_CREATOR(RegistryName, key, ...) \
  TP_REGISTER_TYPED_CREATOR(RegistryName, #key, __VA_ARGS__)

} // namespace tensorpipe


================================================
FILE: tensorpipe/benchmark/transport_registry.cc
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <tensorpipe/benchmark/transport_registry.h>

#include <tensorpipe/tensorpipe.h>

TP_DEFINE_SHARED_REGISTRY(
    TensorpipeTransportRegistry,
    tensorpipe::transport::Context);

// IBV

#if TENSORPIPE_HAS_IBV_TRANSPORT
std::shared_ptr<tensorpipe::transport::Context> makeIbvContext() {
  return tensorpipe::transport::ibv::create();
}

TP_REGISTER_CREATOR(TensorpipeTransportRegistry, ibv, makeIbvContext);
#endif // TENSORPIPE_HAS_IBV_TRANSPORT

// SHM

#if TENSORPIPE_HAS_SHM_TRANSPORT
std::shared_ptr<tensorpipe::transport::Context> makeShmContext() {
  return tensorpipe::transport::shm::create();
}

TP_REGISTER_CREATOR(TensorpipeTransportRegistry, shm, makeShmContext);
#endif // TENSORPIPE_HAS_SHM_TRANSPORT

// UV

std::shared_ptr<tensorpipe::transport::Context> makeUvContext() {
  return tensorpipe::transport::uv::create();
}

TP_REGISTER_CREATOR(TensorpipeTransportRegistry, uv, makeUvContext);

void validateTransportContext(
    std::shared_ptr<tensorpipe::transport::Context> context) {
  if (!context) {
    auto keys = TensorpipeTransportRegistry().keys();
    std::cout
        << "The transport you passed in is not supported. The following transports are valid: ";
    for (const auto& key : keys) {
      std::cout << key << ", ";
    }
    std::cout << "\n";
    exit(EXIT_FAILURE);
  }
}


================================================
FILE: tensorpipe/benchmark/transport_registry.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <tensorpipe/benchmark/registry.h>
#include <tensorpipe/transport/context.h>

TP_DECLARE_SHARED_REGISTRY(
    TensorpipeTransportRegistry,
    tensorpipe::transport::Context);

void validateTransportContext(
    std::shared_ptr<tensorpipe::transport::Context> context);


================================================
FILE: tensorpipe/channel/basic/channel_impl.cc
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <tensorpipe/channel/basic/channel_impl.h>

#include <memory>
#include <string>
#include <utility>

#include <tensorpipe/channel/basic/context_impl.h>
#include <tensorpipe/common/cpu_buffer.h>
#include <tensorpipe/common/defs.h>
#include <tensorpipe/common/error.h>
#include <tensorpipe/transport/connection.h>

namespace tensorpipe {
namespace channel {
namespace basic {

ChannelImpl::ChannelImpl(
    ConstructorToken token,
    std::shared_ptr<ContextImpl> context,
    std::string id,
    std::shared_ptr<transport::Connection> connection)
    : ChannelImplBoilerplate<ContextImpl, ChannelImpl>(
          token,
          std::move(context),
          std::move(id)),
      connection_(std::move(connection)) {}

void ChannelImpl::initImplFromLoop() {
  context_->enroll(*this);
}

void ChannelImpl::sendImplFromLoop(
    uint64_t sequenceNumber,
    Buffer buffer,
    size_t length,
    TSendCallback callback) {
  SendOpIter opIter = sendOps_.emplaceBack(sequenceNumber);
  SendOperation& op = *opIter;
  op.ptr = buffer.unwrap<CpuBuffer>().ptr;
  op.length = length;
  op.callback = std::move(callback);

  sendOps_.advanceOperation(opIter);
}

void ChannelImpl::advanceSendOperation(
    SendOpIter opIter,
    SendOperation::State prevOpState) {
  TP_DCHECK(context_->inLoop());

  SendOperation& op = *opIter;

  sendOps_.attemptTransition(
      opIter,
      /*from=*/SendOperation::UNINITIALIZED,
      /*to=*/SendOperation::FINISHED,
      /*cond=*/error_ || op.length == 0,
      /*actions=*/{&ChannelImpl::callSendCallback});

  // Needs to go after previous op to ensure predictable and consistent ordering
  // of write calls on the connection.
  sendOps_.attemptTransition(
      opIter,
      /*from=*/SendOperation::UNINITIALIZED,
      /*to=*/SendOperation::WRITING,
      /*cond=*/!error_ && prevOpState >= SendOperation::WRITING,
      /*actions=*/{&ChannelImpl::write});

  sendOps_.attemptTransition(
      opIter,
      /*from=*/SendOperation::WRITING,
      /*to=*/SendOperation::FINISHED,
      /*cond=*/op.doneWriting,
      /*actions=*/{&ChannelImpl::callSendCallback});
}

void ChannelImpl::write(SendOpIter opIter) {
  SendOperation& op = *opIter;

  TP_VLOG(6) << "Channel " << id_ << " is writing payload (#"
             << op.sequenceNumber << ")";
  connection_->write(
      op.ptr, op.length, callbackWrapper_([opIter](ChannelImpl& impl) {
        TP_VLOG(6) << "Channel " << impl.id_ << " done writing payload (#"
                   << opIter->sequenceNumber << ")";
        opIter->doneWriting = true;
        impl.sendOps_.advanceOperation(opIter);
      }));
}

void ChannelImpl::callSendCallback(SendOpIter opIter) {
  SendOperation& op = *opIter;

  op.callback(error_);
  // Reset callback to release the resources it was holding.
  op.callback = nullptr;
}

void ChannelImpl::recvImplFromLoop(
    uint64_t sequenceNumber,
    Buffer buffer,
    size_t length,
    TRecvCallback callback) {
  RecvOpIter opIter = recvOps_.emplaceBack(sequenceNumber);
  RecvOperation& op = *opIter;
  op.ptr = buffer.unwrap<CpuBuffer>().ptr;
  op.length = length;
  op.callback = std::move(callback);

  recvOps_.advanceOperation(opIter);
}

void ChannelImpl::advanceRecvOperation(
    RecvOpIter opIter,
    RecvOperation::State prevOpState) {
  TP_DCHECK(context_->inLoop());

  RecvOperation& op = *opIter;

  recvOps_.attemptTransition(
      opIter,
      /*from=*/RecvOperation::UNINITIALIZED,
      /*to=*/RecvOperation::FINISHED,
      /*cond=*/error_ || op.length == 0,
      /*actions=*/{&ChannelImpl::callRecvCallback});

  // Needs to go after previous op to ensure predictable and consistent ordering
  // of read calls on the connection.
  recvOps_.attemptTransition(
      opIter,
      /*from=*/RecvOperation::UNINITIALIZED,
      /*to=*/RecvOperation::READING,
      /*cond=*/!error_ && prevOpState >= RecvOperation::READING,
      /*actions=*/{&ChannelImpl::read});

  recvOps_.attemptTransition(
      opIter,
      /*from=*/RecvOperation::READING,
      /*to=*/RecvOperation::FINISHED,
      /*cond=*/op.doneReading,
      /*actions=*/{&ChannelImpl::callRecvCallback});
}

void ChannelImpl::read(RecvOpIter opIter) {
  RecvOperation& op = *opIter;

  TP_VLOG(6) << "Channel " << id_ << " is reading payload (#"
             << op.sequenceNumber << ")";
  connection_->read(
      op.ptr,
      op.length,
      callbackWrapper_([opIter](
                           ChannelImpl& impl,
                           const void* /* unused */,
                           size_t /* unused */) {
        TP_VLOG(6) << "Channel " << impl.id_ << " done reading payload (#"
                   << opIter->sequenceNumber << ")";
        opIter->doneReading = true;
        impl.recvOps_.advanceOperation(opIter);
      }));
}

void ChannelImpl::callRecvCallback(RecvOpIter opIter) {
  RecvOperation& op = *opIter;

  op.callback(error_);
  // Reset callback to release the resources it was holding.
  op.callback = nullptr;
}

void ChannelImpl::handleErrorImpl() {
  sendOps_.advanceAllOperations();
  recvOps_.advanceAllOperations();

  // Close the connection so that all current operations will be aborted. This
  // will cause their callbacks to be invoked, and only then we'll invoke ours.
  connection_->close();

  context_->unenroll(*this);
}

} // namespace basic
} // namespace channel
} // namespace tensorpipe


================================================
FILE: tensorpipe/channel/basic/channel_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <memory>
#include <string>

#include <tensorpipe/channel/channel_impl_boilerplate.h>
#include <tensorpipe/common/state_machine.h>
#include <tensorpipe/transport/context.h>

namespace tensorpipe {
namespace channel {
namespace basic {

class ContextImpl;

struct SendOperation {
  enum State { UNINITIALIZED, WRITING, FINISHED };

  // Fields used by the state machine
  uint64_t sequenceNumber{0};
  State state{UNINITIALIZED};

  // Progress flags
  bool doneWriting{false};

  // Arguments at creation
  const void* ptr;
  size_t length;
  TSendCallback callback;
};

// State capturing a single recv operation.
struct RecvOperation {
  enum State { UNINITIALIZED, READING, FINISHED };

  // Fields used by the state machine
  uint64_t sequenceNumber{0};
  State state{UNINITIALIZED};

  // Progress flags
  bool doneReading{false};

  // Arguments at creation
  void* ptr;
  size_t length;
  TRecvCallback callback;
};

class ChannelImpl final
    : public ChannelImplBoilerplate<ContextImpl, ChannelImpl> {
 public:
  ChannelImpl(
      ConstructorToken token,
      std::shared_ptr<ContextImpl> context,
      std::string id,
      std::shared_ptr<transport::Connection> connection);

 protected:
  // Implement the entry points called by ChannelImplBoilerplate.
  void initImplFromLoop() override;
  void sendImplFromLoop(
      uint64_t sequenceNumber,
      Buffer buffer,
      size_t length,
      TSendCallback callback) override;
  void recvImplFromLoop(
      uint64_t sequenceNumber,
      Buffer buffer,
      size_t length,
      TRecvCallback callback) override;
  void handleErrorImpl() override;

 private:
  const std::shared_ptr<transport::Connection> connection_;

  OpsStateMachine<ChannelImpl, SendOperation> sendOps_{
      *this,
      &ChannelImpl::advanceSendOperation};
  using SendOpIter = decltype(sendOps_)::Iter;
  OpsStateMachine<ChannelImpl, RecvOperation> recvOps_{
      *this,
      &ChannelImpl::advanceRecvOperation};
  using RecvOpIter = decltype(recvOps_)::Iter;

  // State machines for send and recv ops.
  void advanceSendOperation(
      SendOpIter opIter,
      SendOperation::State prevOpState);
  void advanceRecvOperation(
      RecvOpIter opIter,
      RecvOperation::State prevOpState);

  // Actions (i.e., methods that begin a state transition).
  // For send operations:
  void write(SendOpIter opIter);
  void callSendCallback(SendOpIter opIter);
  // For recv operations:
  void read(RecvOpIter opIter);
  void callRecvCallback(RecvOpIter opIter);
};

} // namespace basic
} // namespace channel
} // namespace tensorpipe


================================================
FILE: tensorpipe/channel/basic/context_impl.cc
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <tensorpipe/channel/basic/context_impl.h>

#include <functional>
#include <utility>

#include <tensorpipe/channel/basic/channel_impl.h>

namespace tensorpipe {
namespace channel {
namespace basic {

std::shared_ptr<ContextImpl> ContextImpl::create() {
  std::unordered_map<Device, std::string> deviceDescriptors = {
      {Device{kCpuDeviceType, 0}, "any"}};
  return std::make_shared<ContextImpl>(std::move(deviceDescriptors));
}

ContextImpl::ContextImpl(
    std::unordered_map<Device, std::string> deviceDescriptors)
    : ContextImplBoilerplate<ContextImpl, ChannelImpl>(
          std::move(deviceDescriptors)) {}

std::shared_ptr<Channel> ContextImpl::createChannel(
    std::vector<std::shared_ptr<transport::Connection>> connections,
    Endpoint /* unused */) {
  TP_DCHECK_EQ(numConnectionsNeeded(), connections.size());
  return createChannelInternal(std::move(connections[0]));
}

void ContextImpl::handleErrorImpl() {}

void ContextImpl::joinImpl() {}

bool ContextImpl::inLoop() const {
  return loop_.inLoop();
};

void ContextImpl::deferToLoop(std::function<void()> fn) {
  loop_.deferToLoop(std::move(fn));
};

} // namespace basic
} // namespace channel
} // namespace tensorpipe


================================================
FILE: tensorpipe/channel/basic/context_impl.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <functional>

#include <tensorpipe/channel/context_impl_boilerplate.h>
#include <tensorpipe/common/deferred_executor.h>
#include <tensorpipe/common/device.h>

namespace tensorpipe {
namespace channel {
namespace basic {

class ChannelImpl;

class ContextImpl final
    : public ContextImplBoilerplate<ContextImpl, ChannelImpl> {
 public:
  static std::shared_ptr<ContextImpl> create();

  explicit ContextImpl(
      std::unordered_map<Device, std::string> deviceDescriptors);

  std::shared_ptr<Channel> createChannel(
      std::vector<std::shared_ptr<transport::Connection>> connections,
      Endpoint endpoint);

  // Implement the DeferredExecutor interface.
  bool inLoop() const override;
  void deferToLoop(std::function<void()> fn) override;

 protected:
  // Implement the entry points called by ContextImplBoilerplate.
  void handleErrorImpl() override;
  void joinImpl() override;

 private:
  OnDemandDeferredExecutor loop_;
};

} // namespace basic
} // namespace channel
} // namespace tensorpipe


================================================
FILE: tensorpipe/channel/basic/factory.cc
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <tensorpipe/channel/basic/factory.h>

#include <tensorpipe/channel/basic/channel_impl.h>
#include <tensorpipe/channel/basic/context_impl.h>
#include <tensorpipe/channel/context_boilerplate.h>

namespace tensorpipe {
namespace channel {
namespace basic {

std::shared_ptr<Context> create() {
  return std::make_shared<ContextBoilerplate<ContextImpl, ChannelImpl>>();
}

} // namespace basic
} // namespace channel
} // namespace tensorpipe


================================================
FILE: tensorpipe/channel/basic/factory.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <memory>

#include <tensorpipe/channel/context.h>

namespace tensorpipe {
namespace channel {
namespace basic {

std::shared_ptr<Context> create();

} // namespace basic
} // namespace channel
} // namespace tensorpipe


================================================
FILE: tensorpipe/channel/channel.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <functional>
#include <string>

#include <tensorpipe/channel/context.h>
#include <tensorpipe/common/buffer.h>
#include <tensorpipe/common/error.h>

// Channels are an out of band mechanism to transfer data between
// processes. Examples include a direct address space to address space
// memory copy on the same machine, or a GPU-to-GPU memory copy.
//
// Construction of a channel happens as follows.
//
//   1) During initialization of a pipe, the connecting peer sends its
//      list of channel contexts and their device descriptors. The
//      device descriptor is used to determine whether or not a
//      channel can be used by a pair of peers.
//   2) The listening side of the pipe compares the list it received
//      its own list to determine the list of channels that should be used
//      for the peers.
//   3) For every channel that should be constructed, the listening
//      side registers a slot with its low level listener. These slots
//      uniquely identify inbound connections on this listener (by
//      sending a word-sized indentifier immediately after connecting)
//      and can be used to construct new connections. These slots are
//      sent to the connecting side of the pipe, which then attempts
//      to establish a new connection for every token.
//   4) At this time, we have a new control connection for every
//      channel that is about to be constructed. Both sides of the
//      pipe can now create the channel instance using the newly
//      created connection. Further initialization that needs to
//      happen is defered to the channel implementation. We assume the
//      channel is usable from the moment it is constructed.
//
namespace tensorpipe {
namespace channel {

using TSendCallback = std::function<void(const Error&)>;
using TRecvCallback = std::function<void(const Error&)>;

// Abstract base class for channel classes.
class Channel {
 public:
  // Send memory region to peer.
  virtual void send(Buffer buffer, size_t length, TSendCallback callback) = 0;

  // Receive memory region from peer.
  virtual void recv(Buffer buffer, size_t length, TRecvCallback callback) = 0;

  // Tell the channel what its identifier is.
  //
  // This is only supposed to be called from the high-level pipe. It will only
  // used for logging and debugging purposes.
  virtual void setId(std::string id) = 0;

  // Put the channel in a terminal state, aborting pending operations and
  // rejecting future ones, and release its resources. This may be carried out
  // asynchronously, in background.
  virtual void close() = 0;

  virtual ~Channel() = default;
};

} // namespace channel
} // namespace tensorpipe


================================================
FILE: tensorpipe/channel/channel_boilerplate.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <cstddef>
#include <memory>
#include <string>
#include <type_traits>
#include <utility>

#include <tensorpipe/channel/channel.h>
#include <tensorpipe/channel/channel_impl_boilerplate.h>

namespace tensorpipe {
namespace channel {

template <typename TCtx, typename TChan>
class ChannelBoilerplate : public Channel {
 public:
  template <typename... Args>
  ChannelBoilerplate(
      typename ChannelImplBoilerplate<TCtx, TChan>::ConstructorToken token,
      std::shared_ptr<TCtx> context,
      std::string id,
      Args&&... args);

  explicit ChannelBoilerplate(std::shared_ptr<TChan> channel);

  ChannelBoilerplate(const ChannelBoilerplate&) = delete;
  ChannelBoilerplate(ChannelBoilerplate&&) = delete;
  ChannelBoilerplate& operator=(const ChannelBoilerplate&) = delete;
  ChannelBoilerplate& operator=(ChannelBoilerplate&&) = delete;

  // Perform a send operation.
  void send(Buffer buffer, size_t length, TSendCallback callback) override;

  // Queue a recv operation.
  void recv(Buffer buffer, size_t length, TRecvCallback callback) override;

  // Tell the connection what its identifier is.
  void setId(std::string id) override;

  // Shut down the connection and its resources.
  void close() override;

  ~ChannelBoilerplate() override;

 protected:
  // Using a shared_ptr allows us to detach the lifetime of the implementation
  // from the public object's one and perform the destruction asynchronously.
  const std::shared_ptr<TChan> impl_;
};

template <typename TCtx, typename TChan>
template <typename... Args>
ChannelBoilerplate<TCtx, TChan>::ChannelBoilerplate(
    typename ChannelImplBoilerplate<TCtx, TChan>::ConstructorToken token,
    std::shared_ptr<TCtx> context,
    std::string id,
    Args&&... args)
    : impl_(std::make_shared<TChan>(
          token,
          std::move(context),
          std::move(id),
          std::forward<Args>(args)...)) {
  static_assert(
      std::is_base_of<ChannelImplBoilerplate<TCtx, TChan>, TChan>::value, "");
  impl_->init();
}

template <typename TCtx, typename TChan>
ChannelBoilerplate<TCtx, TChan>::ChannelBoilerplate(
    std::shared_ptr<TChan> channel)
    : impl_(std::move(channel)) {
  static_assert(
      std::is_base_of<ChannelImplBoilerplate<TCtx, TChan>, TChan>::value, "");
}

template <typename TCtx, typename TChan>
void ChannelBoilerplate<TCtx, TChan>::send(
    Buffer buffer,
    size_t length,
    TSendCallback callback) {
  if (unlikely(!impl_)) {
    // FIXME In C++-17 perhaps a global static inline variable would be better?
    static Error error = TP_CREATE_ERROR(ContextNotViableError);
    callback(error);
    return;
  }
  impl_->send(buffer, length, std::move(callback));
}

template <typename TCtx, typename TChan>
void ChannelBoilerplate<TCtx, TChan>::recv(
    Buffer buffer,
    size_t length,
    TRecvCallback callback) {
  if (unlikely(!impl_)) {
    // FIXME In C++-17 perhaps a global static inline variable would be better?
    static Error error = TP_CREATE_ERROR(ContextNotViableError);
    callback(error);
    return;
  }
  impl_->recv(buffer, length, std::move(callback));
}

template <typename TCtx, typename TChan>
void ChannelBoilerplate<TCtx, TChan>::setId(std::string id) {
  if (unlikely(!impl_)) {
    return;
  }
  impl_->setId(std::move(id));
}

template <typename TCtx, typename TChan>
void ChannelBoilerplate<TCtx, TChan>::close() {
  if (unlikely(!impl_)) {
    return;
  }
  impl_->close();
}

template <typename TCtx, typename TChan>
ChannelBoilerplate<TCtx, TChan>::~ChannelBoilerplate() {
  close();
}

} // namespace channel
} // namespace tensorpipe


================================================
FILE: tensorpipe/channel/channel_impl_boilerplate.h
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#pragma once

#include <memory>
#include <string>
#include <utility>

#include <tensorpipe/channel/channel.h>
#include <tensorpipe/channel/error.h>
#include <tensorpipe/common/callback.h>
#include <tensorpipe/common/defs.h>
#include <tensorpipe/common/error.h>
#include <tensorpipe/common/error_macros.h>

namespace tensorpipe {
namespace channel {

template <typename TCtx, typename TChan>
class ContextImplBoilerplate;

template <typename TCtx, typename TChan>
class ChannelImplBoilerplate : public std::enable_shared_from_this<TChan> {
 public:
  class ConstructorToken {
   public:
    ConstructorToken(const ConstructorToken&) = default;

   private:
    explicit ConstructorToken() {}
    friend ContextImplBoilerplate<TCtx, TChan>;
  };

  ChannelImplBoilerplate(
      ConstructorToken token,
      std::shared_ptr<TCtx> context,
      std::string id);

  ChannelImplBoilerplate(const ChannelImplBoilerplate&) = delete;
  ChannelImplBoilerplate(ChannelImplBoilerplate&&) = delete;
  ChannelImplBoilerplate& operator=(const ChannelImplBoilerplate&) = delete;
  ChannelImplBoilerplate& operator=(ChannelImplBoilerplate&&) = delete;

  // Initialize member fields that need `shared_from_this`.
  void init();

  // Perform a send operation.
  void send(Buffer buffer, size_t length, TSendCallback callback);

  // Queue a recv operation.
  void recv(Buffer buffer, size_t length, TRecvCallback callback);

  // Tell the connection what its identifier is.
  void setId(std::string id);

  // Shut down the connection and its resources.
  void close();

  virtual ~ChannelImplBoilerplate() = default;

 protected:
  virtual void initImplFromLoop() = 0;
  virtual void sendImplFromLoop(
      uint64_t sequenceNumber,
      Buffer buffer,
      size_t length,
      TSendCallback callback) = 0;
  virtual void recvImplFromLoop(
      uint64_t sequenceNumber,
      Buffer buffer,
      size_t length,
      TRecvCallback callback) = 0;
  virtual void handleErrorImpl() = 0;
  virtual void setIdImpl() {}

  void setError(Error error);

  const std::shared_ptr<TCtx> context_;

  Error error_{Error::kSuccess};

  // An identifier for the connection, composed of the identifier for the
  // context or listener, combined with an increasing sequence number. It will
  // only be used for logging and debugging purposes.
  std::string id_;

  CallbackWrapper<TChan> callbackWrapper_{*this, *this->context_};

 private:
  // Initialize member fields that need `shared_from_this`.
  void initFromLoop();

  // Perform a send operation.
  void sendFromLoop(Buffer buffer, size_t length, TSendCallback callback);

  // Queue a recv operation.
  void recvFromLoop(Buffer buffer, size_t length, TRecvCallback callback);

  void setIdFromLoop(std::string id);

  // Shut down the connection and its resources.
  void closeFromLoop();

  // Deal with an error.
  void handleError();

  // A sequence number for the calls to send and recv.
  uint64_t nextTensorBeingSent_{0};
  uint64_t nextTensorBeingReceived_{0};

  // For some odd reason it seems we need to use a qualified name here...
  template <typename T>
  friend class tensorpipe::CallbackWrapper;

  // Contexts do sometimes need to call directly into closeFromLoop, in order to
  // make sure that some of their operations can happen "atomically" on the
  // connection, without possibly other operations occurring in between (e.g.,
  // an error).
  friend ContextImplBoilerplate<TCtx, TChan>;
};

template <typename TCtx, typename TChan>
ChannelImplBoilerplate<TCtx, TChan>::ChannelImplBoilerplate(
    ConstructorToken /* unused */,
    std::shared_ptr<TCtx> context,
    std::string id)
    : context_(std::move(context)), id_(std::move(id)) {}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::init() {
  context_->deferToLoop(
      [impl{this->shared_from_this()}]() { impl->initFromLoop(); });
}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::initFromLoop() {
  if (context_->closed()) {
    // Set the error without calling setError because we do not want to invoke
    // the subclass's handleErrorImpl as it would find itself in a weird state
    // (since initFromLoop wouldn't have been called).
    error_ = TP_CREATE_ERROR(ChannelClosedError);
    TP_VLOG(4) << "Channel " << id_ << " is closing (without initing)";
    return;
  }

  initImplFromLoop();
}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::send(
    Buffer buffer,
    size_t length,
    TSendCallback callback) {
  context_->deferToLoop([impl{this->shared_from_this()},
                         buffer,
                         length,
                         callback{std::move(callback)}]() mutable {
    impl->sendFromLoop(buffer, length, std::move(callback));
  });
}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::sendFromLoop(
    Buffer buffer,
    size_t length,
    TSendCallback callback) {
  TP_DCHECK(context_->inLoop());

  const uint64_t sequenceNumber = nextTensorBeingSent_++;
  TP_VLOG(4) << "Channel " << id_ << " received a send request (#"
             << sequenceNumber << ")";

  callback = [this, sequenceNumber, callback{std::move(callback)}](
                 const Error& error) {
    // There is no requirement for the channel to invoke callbacks in order.
    TP_VLOG(4) << "Channel " << id_ << " is calling a send callback (#"
               << sequenceNumber << ")";
    callback(error);
    TP_VLOG(4) << "Channel " << id_ << " done calling a send callback (#"
               << sequenceNumber << ")";
  };

  if (error_) {
    callback(error_);
    return;
  }

  sendImplFromLoop(sequenceNumber, buffer, length, std::move(callback));
}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::recv(
    Buffer buffer,
    size_t length,
    TRecvCallback callback) {
  context_->deferToLoop([impl{this->shared_from_this()},
                         buffer,
                         length,
                         callback{std::move(callback)}]() mutable {
    impl->recvFromLoop(buffer, length, std::move(callback));
  });
}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::recvFromLoop(
    Buffer buffer,
    size_t length,
    TRecvCallback callback) {
  TP_DCHECK(context_->inLoop());

  const uint64_t sequenceNumber = nextTensorBeingReceived_++;
  TP_VLOG(4) << "Channel " << id_ << " received a recv request (#"
             << sequenceNumber << ")";

  callback = [this, sequenceNumber, callback{std::move(callback)}](
                 const Error& error) {
    // There is no requirement for the channel to invoke callbacks in order.
    TP_VLOG(4) << "Channel " << id_ << " is calling a recv callback (#"
               << sequenceNumber << ")";
    callback(error);
    TP_VLOG(4) << "Channel " << id_ << " done calling a recv callback (#"
               << sequenceNumber << ")";
  };

  if (error_) {
    callback(error_);
    return;
  }

  recvImplFromLoop(sequenceNumber, buffer, length, std::move(callback));
}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::setId(std::string id) {
  context_->deferToLoop(
      [impl{this->shared_from_this()}, id{std::move(id)}]() mutable {
        impl->setIdFromLoop(std::move(id));
      });
}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::setIdFromLoop(std::string id) {
  TP_DCHECK(context_->inLoop());
  TP_VLOG(4) << "Channel " << id_ << " was renamed to " << id;
  id_ = std::move(id);
  setIdImpl();
}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::close() {
  context_->deferToLoop(
      [impl{this->shared_from_this()}]() { impl->closeFromLoop(); });
}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::closeFromLoop() {
  TP_DCHECK(context_->inLoop());
  TP_VLOG(4) << "Channel " << id_ << " is closing";
  setError(TP_CREATE_ERROR(ChannelClosedError));
}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::setError(Error error) {
  // Don't overwrite an error that's already set.
  if (error_ || !error) {
    return;
  }

  error_ = std::move(error);

  handleError();
}

template <typename TCtx, typename TChan>
void ChannelImplBoilerplate<TCtx, TChan>::handleError() {
  TP_DCHECK(context_->inLoop());
  TP_VLOG(5) << "Channel " << id_ << " is handling error " << error_.what();

  handleErrorImpl();
}

} // namespace channel
} // namespace tensorpipe


================================================
FILE: tensorpipe/channel/cma/channel_impl.cc
================================================
/*
 * Copyright (c) Meta Platforms, Inc. and affiliates.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the root directory of this source tree.
 */

#include <tensorpipe/channel/cma/channel_impl.h>

#include <memory>
#include <string>
#include <utility>

#include <nop/serializer.h>
#include <nop/structure.h>

#include <tensorpipe/channel/cma/context_impl.h>
#include <tensorpipe/common/cpu_buffer.h>
#include <tensorpipe/common/defs.h>
#include <tensorpipe/common/error.h>
#include <tensorpipe/transport/connection.h>

namespace tensorpipe {
namespace channel {
namespace cma {

namespace {

struct Descriptor {
  uint32_t pid;
  uint64_t ptr;
  NOP_STRUCTURE(Descriptor, pid, ptr);
};

} // namespace

ChannelImpl::ChannelImpl(
    ConstructorToken token,
    std::shared_ptr<ContextImpl> context,
    std::string id,
    std::shared_ptr<transport::Connection> descriptorConnection,
    std::shared_ptr<transport::Connection> completionConnection)
    : ChannelImplBoilerplate<ContextImpl, ChannelImpl>(
          token,
          std::move(context),
          std::move(id)),
      descriptorConnection_(std::move(descriptorConnection)),
      completionConnection_(std::move(completionConnection)) {}

void ChannelImpl::initImplFromLoop() {
  context_->enroll(*this);
}

void ChannelImpl::sendImplFromLoop(
    uint64_t sequenceNumber,
    Buffer buffer,
    size_t length,
    TSendCallback callback) {
  SendOpIter opIter = sendOps_.emplaceBack(sequenceNumber);
  SendOperation& op = *opIter;
  op.callback = std::move(callback);
  op.ptr = buffer.unwrap<CpuBuffer>().ptr;
  op.length = length;

  sendOps_.advanceOperation(opIter);
}

void ChannelImpl::advanceSendOperation(
    SendOpIter opIter,
    SendOperation::State prevOpState) {
  TP_DCHECK(context_->inLoop());

  SendOperation& op = *opIter;

  sendOps_.attemptTransition(
      opIter,
      /*from=*/SendOperation::UNINITIALIZED,
      /*to=*/SendOperation::FINISHED,
      /*cond=*/error_ || op.length == 0,
      /*actions=*/{&ChannelImpl::callSendCallback});

  // Needs to go after previous op to ensure predictable and consistent ordering
  // of write calls on the descriptor control connection and read calls on the
  // completion control connection.
  sendOps_.attemptTransition(
      opIter,
      /*from=*/SendOperation::UNINITIALIZED,
      /*to=*/SendOperation::READING_COMPLETION,
      /*cond=*/!error_ && prevOpState >= SendOperation::READING_COMPLETION,
      /*actions=*/
      {&ChannelImpl::writeDescriptor, &ChannelImpl::readCompletion});

  sendOps_.attemptTransition(
      opIter,
      /*from=*/SendOperation::READING_COMPLETION,
      /*to=*/SendOperation::FINISHED,
      /*cond=*/op.doneReadingCompletion,
      /*actions=*/{&ChannelImpl::callSendCallback});
}

void ChannelImpl::writeDescriptor(SendOpIter opIter) {
  SendOperation& op = *opIter;

  auto nopHolder = std::make_shared<NopHolder<Descriptor>>();
  Descriptor& nopDescriptor = nopHolder->getObject();
  // TODO: Store the PID upon channel/context instantiation.
  nopDescriptor.pid = ::getpid();
  nopDescriptor.ptr = reinterpret_cast<uint64_t>(op.ptr);

  TP_VLOG(6) << "Channel " << id_ << " is writing descriptor (#"
             << op.sequenceNumber << ")";
  descriptorConnection_->write(
      *nopHolder,
      callbackWrapper_([sequenceNumber{op.sequenceNumber},
                        nopHolder](ChannelImpl& impl) {
        TP_VLOG(6) << "Channel " << impl.id_ << " done writing descriptor (#"
                   << sequenceNumber << ")";
      }));
}

void ChannelImpl::readCompletion(SendOpIter opIter) {
  SendOperation& op = *opIter;

  TP_VLOG(6) << "Channel " << id_ << " is reading completion (#"
             << op.sequenceNumber << ")";
  completionConnection_->read(
      nullptr,
      0,
      callbackWrapper_([opIter](
                           ChannelImpl& impl,
                           const void* /* unused */,
                           size_t /* unused */) {
        TP_VLOG(6) << "Channel " << impl.id_ << " done reading completion (#"
                   << opIter->sequenceNumber << ")";
        opIter->doneReadingCompletion = true;
        impl.sendOps_.advanceOperation(opIter);
      }));
}

void ChannelImpl::callSendCallback(SendOpIter opIter) {
  SendOperation& op = *opIter;

  op.callback(error_);
  // Reset callback to release the resources it was holding.
  op.callback = nullptr;
}

void ChannelImpl::recvImplFromLoop(
    uint64_t sequenceNumber,
    Buffer buffer,
    size_t length,
    TRecvCallback callback) {
  RecvOpIter opIter = recvOps_.emplaceBack(sequenceNumber);
  RecvOperation& op = *opIter;
  op.ptr = buffer.unwrap<CpuBuffer>().ptr;
  op.length = length;
  op.callback = std::move(callback);

  recvOps_.advanceOperation(opIter);
}

void ChannelImpl::advanceRecvOperation(
    RecvOpIter opIter,
    RecvOperation::State prevOpState) {
  TP_DCHECK(context_->inLoop());

  RecvOperation& op = *opIter;

  recvOps_.attemptTransition(
      opIter,
      /*from=*/RecvOperation::UNINITIALIZED,
      /*to=*/RecvOperation::FINISHED,
      /*cond=*/error_ || op.length == 0,
      /*actions=*/{&ChannelImpl::callRecvCallback});

  // Needs to go after previous op to ensure predictable and consistent ordering
  // of read calls on the descriptor control connection.
  recvOps_.attemptTransition(
      opIter,
      /*from=*/RecvOperation::UNINITIALIZED,
      /*to=*/RecvOperation::READING_DESCRIPTOR,
      /*cond=*/!error_ && prevOpState >= RecvOperation::READING_DESCRIPTOR,
      /*actions=*/{&ChannelImpl::readDescriptor});

  recvOps_.attemptTransition(
      opIter,
      /*from=*/RecvOperation::READING_DESCRIPTOR,
      /*to=*/RecvOperation::FINISHED,
      /*cond=*/error_ && op.doneReadingDescriptor,
      /*actions=*/{&ChannelImpl::callRecvCallback});

  recvOps_.attemptTransition(
      opIter,
      /*from=*/RecvOperation::READING_DESCRIPTOR,
      /*to=*/RecvOperation::COPYING,
      /*cond=*/!error_ && op.doneReadingDescriptor,
      /*actions=*/{&ChannelImpl::copy});

  recvOps_.attemptTransition(
      opIter,
      /*from=*/RecvOperation::COPYING,
      /*to=*/RecvOperation::FINISHED,
      /*cond=*/error_ && op.doneCopying,
      /*actions=*/{&ChannelImpl::callRecvCallback});

  // Needs to go after previous op to ensure predictable and consistent ordering
  // of write calls on the completion control connection.
  recvOps_.attemptTransition(
      opIter,
      /*from=*/RecvOperation::COPYING,
      /*to=*/RecvOperation::FINISHED,
      /*cond=*/!error_ && op.doneCopying &&
          prevOpState >= RecvOperation::FINISHED,
      /*actions=*/
      {&ChannelImpl::callRecvCallback, &ChannelImpl::writeCompletion});
}

void ChannelImpl::readDescriptor(RecvOpIter opIter) {
  RecvOperation& op = *opIter;

  TP_VLOG(6) << "Channel " << id_ << " is reading descriptor (#"
             << op.sequenceNumber << ")";
  auto nopHolderIn = std::make_shared<NopHolder<Descriptor>>();
  descriptorConnection_->read(
      *nopHolderIn, callbackWrapper_([opIter, nopHolderIn](ChannelImpl& impl) {
        TP_VLOG(6) << "Channel " << impl.id_ << " done reading descriptor (#"
                   << opIter->sequenceNumber << ")";
        opIter->doneReadingDescriptor = true;
        if (!impl.error_) {
          Descriptor& nopDescriptor = nopHolderIn->getObject();
          opIter->remotePid = nopDescriptor.pid;
          opIter->remotePtr = reinterpret_cast<void*>(nopDescriptor.ptr);
        }
        impl.recvOps_.advanceOperation(opIter);
      }));
}

void ChannelImpl::copy(RecvOpIter opIter) {
  RecvOperation& op = *opIter;

  TP_VLOG(6) << "Channel " << id_ << " is copying payload (#"
             << op.sequenceNumber << ")";
  context_->requestCopy(
      op.remotePid,
      op.remotePtr,
      op.ptr,
      op.length,
      callbackWrapper_([opIter](ChannelImpl& impl) {
        TP_VLOG(6) << "Channel " << impl.id_ << " done copying payload (#"
                   << opIter->sequenceNumber << ")";
        opIter->doneCopying = true;
        impl.recvOps_.advanceOperation(opIter);
      }));
}

void C
Download .txt
gitextract_wzzfsv6c/

├── .circleci/
│   ├── Dockerfile.cuda10.1
│   ├── Dockerfile.cuda10.2
│   ├── Dockerfile.cuda11.0
│   ├── Dockerfile.cuda11.1
│   ├── Dockerfile.cuda9.2
│   └── config.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE.txt
├── README.md
├── cmake/
│   ├── FindPackageHandleStandardArgs.cmake
│   ├── FindPackageMessage.cmake
│   ├── Finduv.cmake
│   ├── MiscCheck.cmake
│   ├── Options.cmake
│   └── Sanitize.cmake
├── docs/
│   ├── cuda_gotchas.md
│   ├── development.md
│   ├── linux_support.md
│   ├── shm.md
│   └── thread_model.md
├── setup.py
├── tensorpipe/
│   ├── .clang-format
│   ├── .clang-tidy
│   ├── CMakeLists.txt
│   ├── benchmark/
│   │   ├── CMakeLists.txt
│   │   ├── benchmark_pipe.cc
│   │   ├── benchmark_transport.cc
│   │   ├── channel_registry.cc
│   │   ├── channel_registry.h
│   │   ├── measurements.h
│   │   ├── options.cc
│   │   ├── options.h
│   │   ├── registry.h
│   │   ├── transport_registry.cc
│   │   └── transport_registry.h
│   ├── channel/
│   │   ├── basic/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── channel.h
│   │   ├── channel_boilerplate.h
│   │   ├── channel_impl_boilerplate.h
│   │   ├── cma/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── context.h
│   │   ├── context_boilerplate.h
│   │   ├── context_impl_boilerplate.h
│   │   ├── cuda_basic/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── constants.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── cuda_gdr/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── constants.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── error.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── cuda_ipc/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── constants.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── cuda_xth/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   └── factory.h
│   │   ├── error.cc
│   │   ├── error.h
│   │   ├── helpers.cc
│   │   ├── helpers.h
│   │   ├── mpt/
│   │   │   ├── channel_impl.cc
│   │   │   ├── channel_impl.h
│   │   │   ├── context_impl.cc
│   │   │   ├── context_impl.h
│   │   │   ├── factory.cc
│   │   │   ├── factory.h
│   │   │   └── nop_types.h
│   │   └── xth/
│   │       ├── channel_impl.cc
│   │       ├── channel_impl.h
│   │       ├── context_impl.cc
│   │       ├── context_impl.h
│   │       ├── factory.cc
│   │       └── factory.h
│   ├── common/
│   │   ├── address.cc
│   │   ├── address.h
│   │   ├── allocator.cc
│   │   ├── allocator.h
│   │   ├── buffer.h
│   │   ├── busy_polling_loop.h
│   │   ├── callback.h
│   │   ├── cpu_buffer.h
│   │   ├── cuda.h
│   │   ├── cuda_buffer.cc
│   │   ├── cuda_buffer.h
│   │   ├── cuda_lib.h
│   │   ├── cuda_loop.cc
│   │   ├── cuda_loop.h
│   │   ├── deferred_executor.h
│   │   ├── defs.h
│   │   ├── device.h
│   │   ├── dl.h
│   │   ├── epoll_loop.cc
│   │   ├── epoll_loop.h
│   │   ├── error.cc
│   │   ├── error.h
│   │   ├── error_macros.h
│   │   ├── fd.cc
│   │   ├── fd.h
│   │   ├── ibv.cc
│   │   ├── ibv.h
│   │   ├── ibv_lib.h
│   │   ├── memory.h
│   │   ├── nop.h
│   │   ├── nvml_lib.h
│   │   ├── optional.h
│   │   ├── queue.h
│   │   ├── ringbuffer.h
│   │   ├── ringbuffer_read_write_ops.h
│   │   ├── ringbuffer_role.h
│   │   ├── shm_ringbuffer.h
│   │   ├── shm_segment.cc
│   │   ├── shm_segment.h
│   │   ├── socket.cc
│   │   ├── socket.h
│   │   ├── state_machine.h
│   │   ├── stream_read_write_ops.h
│   │   ├── strings.h
│   │   ├── system.cc
│   │   └── system.h
│   ├── config.h.in
│   ├── config_cuda.h.in
│   ├── core/
│   │   ├── context.cc
│   │   ├── context.h
│   │   ├── context_impl.cc
│   │   ├── context_impl.h
│   │   ├── error.cc
│   │   ├── error.h
│   │   ├── listener.cc
│   │   ├── listener.h
│   │   ├── listener_impl.cc
│   │   ├── listener_impl.h
│   │   ├── message.h
│   │   ├── nop_types.h
│   │   ├── pipe.cc
│   │   ├── pipe.h
│   │   ├── pipe_impl.cc
│   │   └── pipe_impl.h
│   ├── misc/
│   │   ├── CMakeLists.txt
│   │   └── dump_state_machine.cc
│   ├── python/
│   │   ├── CMakeLists.txt
│   │   └── tensorpipe.cc
│   ├── tensorpipe.h
│   ├── tensorpipe_cuda.h
│   ├── test/
│   │   ├── CMakeLists.txt
│   │   ├── channel/
│   │   │   ├── basic/
│   │   │   │   └── basic_test.cc
│   │   │   ├── channel_test.cc
│   │   │   ├── channel_test.h
│   │   │   ├── channel_test_cpu.cc
│   │   │   ├── channel_test_cpu.h
│   │   │   ├── channel_test_cuda.cc
│   │   │   ├── channel_test_cuda.h
│   │   │   ├── channel_test_cuda_multi_gpu.cc
│   │   │   ├── channel_test_cuda_xdtt.cc
│   │   │   ├── cma/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── cma_test.cc
│   │   │   │   ├── docker_tests.sh
│   │   │   │   ├── probe.cc
│   │   │   │   └── probe_report_checker.py
│   │   │   ├── cuda_basic/
│   │   │   │   └── cuda_basic_test.cc
│   │   │   ├── cuda_gdr/
│   │   │   │   └── cuda_gdr_test.cc
│   │   │   ├── cuda_helpers.h
│   │   │   ├── cuda_ipc/
│   │   │   │   └── cuda_ipc_test.cc
│   │   │   ├── cuda_xth/
│   │   │   │   └── cuda_xth_test.cc
│   │   │   ├── kernel.cu
│   │   │   ├── kernel.cuh
│   │   │   ├── mpt/
│   │   │   │   └── mpt_test.cc
│   │   │   └── xth/
│   │   │       └── xth_test.cc
│   │   ├── common/
│   │   │   ├── cuda_test.cc
│   │   │   ├── defs_test.cc
│   │   │   ├── epoll_loop_test.cc
│   │   │   ├── ringbuffer_test.cc
│   │   │   ├── shm_ringbuffer_test.cc
│   │   │   ├── shm_segment_test.cc
│   │   │   └── system_test.cc
│   │   ├── core/
│   │   │   ├── context_test.cc
│   │   │   ├── listener_test.cc
│   │   │   ├── pipe_cuda_test.cc
│   │   │   ├── pipe_test.cc
│   │   │   └── pipe_test.h
│   │   ├── peer_group.h
│   │   ├── python/
│   │   │   └── tensorpipe.py
│   │   ├── test.cc
│   │   ├── test_environment.cc
│   │   ├── test_environment.h
│   │   └── transport/
│   │       ├── connection_test.cc
│   │       ├── context_test.cc
│   │       ├── ibv/
│   │       │   ├── connection_test.cc
│   │       │   ├── context_test.cc
│   │       │   ├── ibv_test.cc
│   │       │   ├── ibv_test.h
│   │       │   └── sockaddr_test.cc
│   │       ├── listener_test.cc
│   │       ├── shm/
│   │       │   ├── connection_test.cc
│   │       │   ├── listener_test.cc
│   │       │   ├── reactor_test.cc
│   │       │   ├── shm_test.cc
│   │       │   ├── shm_test.h
│   │       │   └── sockaddr_test.cc
│   │       ├── transport_test.h
│   │       └── uv/
│   │           ├── connection_test.cc
│   │           ├── context_test.cc
│   │           ├── loop_test.cc
│   │           ├── sockaddr_test.cc
│   │           ├── uv_test.cc
│   │           └── uv_test.h
│   └── transport/
│       ├── connection.h
│       ├── connection_boilerplate.h
│       ├── connection_impl_boilerplate.h
│       ├── context.h
│       ├── context_boilerplate.h
│       ├── context_impl_boilerplate.h
│       ├── error.cc
│       ├── error.h
│       ├── ibv/
│       │   ├── connection_impl.cc
│       │   ├── connection_impl.h
│       │   ├── constants.h
│       │   ├── context_impl.cc
│       │   ├── context_impl.h
│       │   ├── error.cc
│       │   ├── error.h
│       │   ├── factory.cc
│       │   ├── factory.h
│       │   ├── listener_impl.cc
│       │   ├── listener_impl.h
│       │   ├── reactor.cc
│       │   ├── reactor.h
│       │   ├── sockaddr.cc
│       │   ├── sockaddr.h
│       │   ├── utility.cc
│       │   └── utility.h
│       ├── listener.h
│       ├── listener_boilerplate.h
│       ├── listener_impl_boilerplate.h
│       ├── shm/
│       │   ├── connection_impl.cc
│       │   ├── connection_impl.h
│       │   ├── context_impl.cc
│       │   ├── context_impl.h
│       │   ├── factory.cc
│       │   ├── factory.h
│       │   ├── listener_impl.cc
│       │   ├── listener_impl.h
│       │   ├── reactor.cc
│       │   ├── reactor.h
│       │   ├── sockaddr.cc
│       │   └── sockaddr.h
│       └── uv/
│           ├── connection_impl.cc
│           ├── connection_impl.h
│           ├── context_impl.cc
│           ├── context_impl.h
│           ├── error.cc
│           ├── error.h
│           ├── factory.cc
│           ├── factory.h
│           ├── listener_impl.cc
│           ├── listener_impl.h
│           ├── loop.cc
│           ├── loop.h
│           ├── sockaddr.cc
│           ├── sockaddr.h
│           ├── utility.cc
│           ├── utility.h
│           └── uv.h
└── third_party/
    └── README.md
Download .txt
SYMBOL INDEX (883 symbols across 242 files)

FILE: setup.py
  class CMakeBuild (line 17) | class CMakeBuild(build_ext):
    method run (line 18) | def run(self):
    method build_extension (line 22) | def build_extension(self, ext):

FILE: tensorpipe/benchmark/benchmark_pipe.cc
  type NcclCommDeleter (line 37) | struct NcclCommDeleter {
  function NcclComm (line 46) | static NcclComm createNcclComm(int rank, int worldSize, ncclUniqueId uni...
  type CudaMemoryDeleter (line 61) | struct CudaMemoryDeleter {
  type CudaStreamDeleter (line 67) | struct CudaStreamDeleter {
  type Data (line 77) | struct Data {
  type MultiDeviceMeasurements (line 102) | struct MultiDeviceMeasurements {
  function printMeasurements (line 109) | static void printMeasurements(Measurements& measurements, size_t dataLen) {
  function printMultiDeviceMeasurements (line 133) | static void printMultiDeviceMeasurements(
  function createEmptyCpuData (line 140) | static std::unique_ptr<uint8_t[]> createEmptyCpuData(size_t size) {
  function createFullCpuData (line 144) | static std::unique_ptr<uint8_t[]> createFullCpuData(size_t size) {
  function CudaTensor (line 153) | static CudaTensor createEmptyCudaData(size_t size) {
  function CudaTensor (line 159) | static CudaTensor createFullCudaData(size_t size) {
  function CudaStream (line 167) | static CudaStream createCudaStream() {
  function serverPongPingNonBlock (line 173) | static void serverPongPingNonBlock(
  function runServer (line 345) | static void runServer(const Options& options) {
  function clientPingPongNonBlock (line 422) | static void clientPingPongNonBlock(
  function runClient (line 639) | static void runClient(const Options& options) {
  function main (line 715) | int main(int argc, char** argv) {

FILE: tensorpipe/benchmark/benchmark_transport.cc
  type Data (line 24) | struct Data {
  function printMeasurements (line 30) | static void printMeasurements(Measurements& measurements, size_t dataLen) {
  function createData (line 54) | static std::unique_ptr<uint8_t[]> createData(const int size) {
  function serverPongPingNonBlock (line 63) | static void serverPongPingNonBlock(
  function runServer (line 94) | static void runServer(const Options& options) {
  function clientPingPongNonBlock (line 124) | static void clientPingPongNonBlock(
  function runClient (line 158) | static void runClient(const Options& options) {
  function main (line 181) | int main(int argc, char** argv) {

FILE: tensorpipe/benchmark/channel_registry.cc
  function makeBasicChannel (line 20) | std::shared_ptr<tensorpipe::channel::Context> makeBasicChannel() {
  function makeCmaChannel (line 29) | std::shared_ptr<tensorpipe::channel::Context> makeCmaChannel() {
  function makeMptChannel (line 38) | std::shared_ptr<tensorpipe::channel::Context> makeMptChannel() {
  function makeXthChannel (line 46) | std::shared_ptr<tensorpipe::channel::Context> makeXthChannel() {
  function makeCudaXthChannel (line 54) | std::shared_ptr<tensorpipe::channel::Context> makeCudaXthChannel() {
  function makeCudaBasicChannel (line 62) | std::shared_ptr<tensorpipe::channel::Context> makeCudaBasicChannel() {
  function makeCudaIpcChannel (line 75) | std::shared_ptr<tensorpipe::channel::Context> makeCudaIpcChannel() {
  function makeCudaGdrChannel (line 85) | std::shared_ptr<tensorpipe::channel::Context> makeCudaGdrChannel() {
  function validateChannelContext (line 92) | void validateChannelContext(

FILE: tensorpipe/benchmark/measurements.h
  function namespace (line 15) | namespace tensorpipe {
  function nanoseconds (line 51) | nanoseconds percentile(float f) const {

FILE: tensorpipe/benchmark/options.cc
  type tensorpipe (line 15) | namespace tensorpipe {
    type benchmark (line 16) | namespace benchmark {
      function usage (line 18) | static void usage(int status, const char* argv0) {
      function validateOptions (line 43) | static void validateOptions(Options options, const char* argv0) {
      function parseOptions (line 66) | struct Options parseOptions(int argc, char** argv) {

FILE: tensorpipe/benchmark/options.h
  function namespace (line 16) | namespace tensorpipe {

FILE: tensorpipe/benchmark/registry.h
  function namespace (line 30) | namespace tensorpipe {

FILE: tensorpipe/benchmark/transport_registry.cc
  function makeIbvContext (line 20) | std::shared_ptr<tensorpipe::transport::Context> makeIbvContext() {
  function makeShmContext (line 30) | std::shared_ptr<tensorpipe::transport::Context> makeShmContext() {
  function makeUvContext (line 39) | std::shared_ptr<tensorpipe::transport::Context> makeUvContext() {
  function validateTransportContext (line 45) | void validateTransportContext(

FILE: tensorpipe/channel/basic/channel_impl.cc
  type tensorpipe (line 21) | namespace tensorpipe {
    type channel (line 22) | namespace channel {
      type basic (line 23) | namespace basic {

FILE: tensorpipe/channel/basic/channel_impl.h
  function namespace (line 20) | namespace basic {

FILE: tensorpipe/channel/basic/context_impl.cc
  type tensorpipe (line 16) | namespace tensorpipe {
    type channel (line 17) | namespace channel {
      type basic (line 18) | namespace basic {

FILE: tensorpipe/channel/basic/context_impl.h
  function namespace (line 17) | namespace tensorpipe {

FILE: tensorpipe/channel/basic/factory.cc
  type tensorpipe (line 15) | namespace tensorpipe {
    type channel (line 16) | namespace channel {
      type basic (line 17) | namespace basic {
        function create (line 19) | std::shared_ptr<Context> create() {

FILE: tensorpipe/channel/basic/factory.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/channel/channel.h
  function namespace (line 45) | namespace tensorpipe {

FILE: tensorpipe/channel/channel_boilerplate.h
  function namespace (line 20) | namespace tensorpipe {

FILE: tensorpipe/channel/channel_impl_boilerplate.h
  function namespace (line 22) | namespace tensorpipe {

FILE: tensorpipe/channel/cma/channel_impl.cc
  type tensorpipe (line 24) | namespace tensorpipe {
    type channel (line 25) | namespace channel {
      type cma (line 26) | namespace cma {
        type Descriptor (line 30) | struct Descriptor {

FILE: tensorpipe/channel/cma/channel_impl.h
  type SendOperation (line 24) | struct SendOperation {
  function State (line 29) | State state{UNINITIALIZED};

FILE: tensorpipe/channel/cma/context_impl.cc
  type tensorpipe (line 29) | namespace tensorpipe {
    type channel (line 30) | namespace channel {
      type cma (line 31) | namespace cma {
        function Error (line 39) | Error callProcessVmReadv(
        class BadReadError (line 70) | class BadReadError final : public BaseError {
          method BadReadError (line 72) | BadReadError(uint64_t expected, uint64_t actual)
          method what (line 75) | std::string what() const override {
        function Error (line 91) | Error attemptProcessVmReadvSyscallOnSelf() {
        function Error (line 111) | Error performCopy(
        function Error (line 350) | const Error& error) {

FILE: tensorpipe/channel/cma/context_impl.h
  function namespace (line 22) | namespace tensorpipe {

FILE: tensorpipe/channel/cma/factory.cc
  type tensorpipe (line 15) | namespace tensorpipe {
    type channel (line 16) | namespace channel {
      type cma (line 17) | namespace cma {
        function create (line 19) | std::shared_ptr<Context> create() {

FILE: tensorpipe/channel/cma/factory.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/channel/context.h
  function namespace (line 19) | namespace tensorpipe {

FILE: tensorpipe/channel/context_boilerplate.h
  function namespace (line 20) | namespace tensorpipe {

FILE: tensorpipe/channel/context_impl_boilerplate.h
  function namespace (line 22) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_basic/channel_impl.cc
  type tensorpipe (line 27) | namespace tensorpipe {
    type channel (line 28) | namespace channel {
      type cuda_basic (line 29) | namespace cuda_basic {
        function ceilOfRatio (line 33) | size_t ceilOfRatio(size_t n, size_t d) {

FILE: tensorpipe/channel/cuda_basic/channel_impl.h
  type ChunkSendOperation (line 28) | struct ChunkSendOperation {
  function bufferSequenceNumber (line 43) | uint64_t bufferSequenceNumber{0}
  function chunkId (line 46) | size_t chunkId{0}
  function numChunks (line 47) | size_t numChunks{0}
  function length (line 48) | size_t length{0}
  function deviceIdx (line 53) | int deviceIdx{0}
  function doneSendingCpuBuffer (line 61) | bool doneSendingCpuBuffer{false};
  type ChunkRecvOperation (line 64) | struct ChunkRecvOperation {
  function chunkId (line 83) | size_t chunkId{0}
  function numChunks (line 84) | size_t numChunks{0}
  function length (line 85) | size_t length{0}
  function doneReadingReadyToSend (line 96) | bool doneReadingReadyToSend{false};

FILE: tensorpipe/channel/cuda_basic/constants.h
  function namespace (line 13) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_basic/context_impl.cc
  type tensorpipe (line 21) | namespace tensorpipe {
    type channel (line 22) | namespace channel {
      type cuda_basic (line 23) | namespace cuda_basic {
        type DeviceDescriptor (line 27) | struct DeviceDescriptor {
        function DeviceDescriptor (line 33) | DeviceDescriptor deserializeDeviceDescriptor(
        function CudaLib (line 130) | const CudaLib& ContextImpl::getCudaLib() {
        function Allocator (line 134) | Allocator& ContextImpl::getCudaHostSendAllocator(int deviceIdx) {
        function Allocator (line 145) | Allocator& ContextImpl::getCudaHostRecvAllocator(int deviceIdx) {

FILE: tensorpipe/channel/cuda_basic/context_impl.h
  function namespace (line 21) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_basic/factory.cc
  type tensorpipe (line 15) | namespace tensorpipe {
    type channel (line 16) | namespace channel {
      type cuda_basic (line 17) | namespace cuda_basic {
        function create (line 19) | std::shared_ptr<Context> create(std::shared_ptr<Context> cpuContex...

FILE: tensorpipe/channel/cuda_basic/factory.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_gdr/channel_impl.cc
  type tensorpipe (line 24) | namespace tensorpipe {
    type channel (line 25) | namespace channel {
      type cuda_gdr (line 26) | namespace cuda_gdr {
        function ceilOfRatio (line 30) | size_t ceilOfRatio(size_t n, size_t d) {

FILE: tensorpipe/channel/cuda_gdr/channel_impl.h
  function namespace (line 27) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_gdr/constants.h
  function namespace (line 13) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_gdr/context_impl.cc
  type tensorpipe (line 36) | namespace tensorpipe {
    type channel (line 37) | namespace channel {
      type cuda_gdr (line 38) | namespace cuda_gdr {
        function applyFuncImpl (line 45) | auto applyFuncImpl(
        function applyFunc (line 54) | auto applyFunc(IbvNic& subject, TMethod&& method, TArgsTuple&& arg...
        function isNvidiaPeerMemoryClientActive (line 93) | bool isNvidiaPeerMemoryClientActive() {
        function getPciPathForIbvNic (line 112) | std::string getPciPathForIbvNic(const std::string& nicName) {
        function getPciPathForGpu (line 125) | std::string getPciPathForGpu(int gpuIdx) {
        function commonPrefixLength (line 157) | size_t commonPrefixLength(const std::string& a, const std::string&...
        function matchGpusToIbvNics (line 169) | std::vector<std::string> matchGpusToIbvNics(
        function getBar1SizeOfGpu (line 236) | size_t getBar1SizeOfGpu(int gpuIdx) {
        function allGpusHaveEnoughBar1Size (line 247) | bool allGpusHaveEnoughBar1Size() {
        function IbvMemoryRegion (line 412) | IbvMemoryRegion& IbvNic::registerMemory(CudaBuffer buffer) {
        function CudaLib (line 583) | const CudaLib& ContextImpl::getCudaLib() {
        function IbvLib (line 591) | const IbvLib& ContextImpl::getIbvLib() {
        function IbvNic (line 595) | IbvNic& ContextImpl::getIbvNic(size_t nicIdx) {

FILE: tensorpipe/channel/cuda_gdr/context_impl.h
  function namespace (line 33) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_gdr/error.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_gdr/factory.cc
  type tensorpipe (line 15) | namespace tensorpipe {
    type channel (line 16) | namespace channel {
      type cuda_gdr (line 17) | namespace cuda_gdr {
        function create (line 19) | std::shared_ptr<Context> create(

FILE: tensorpipe/channel/cuda_gdr/factory.h
  function namespace (line 17) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_ipc/channel_impl.cc
  type tensorpipe (line 29) | namespace tensorpipe {
    type channel (line 30) | namespace channel {
      type cuda_ipc (line 31) | namespace cuda_ipc {
        function ceilOfRatio (line 41) | size_t ceilOfRatio(size_t n, size_t d) {
        type Descriptor (line 45) | struct Descriptor {

FILE: tensorpipe/channel/cuda_ipc/channel_impl.h
  type ChunkSendOperation (line 31) | struct ChunkSendOperation {
  function CudaEvent (line 60) | CudaEvent* event{nullptr};
  type ChunkRecvOperation (line 73) | struct ChunkRecvOperation {
  function State (line 78) | State state{UNINITIALIZED};

FILE: tensorpipe/channel/cuda_ipc/constants.h
  function namespace (line 13) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_ipc/context_impl.cc
  type tensorpipe (line 38) | namespace tensorpipe {
    type channel (line 39) | namespace channel {
      type cuda_ipc (line 40) | namespace cuda_ipc {
        function getGlobalUuidsAndP2pSupport (line 44) | std::tuple<std::vector<std::string>, std::vector<std::vector<bool>>>
        function globalIdxForDevice (line 98) | int globalIdxForDevice(
        type DeviceDescriptor (line 108) | struct DeviceDescriptor {
        function DeviceDescriptor (line 115) | DeviceDescriptor deserializeDeviceDescriptor(
        function generateBootId (line 122) | std::string generateBootId() {
        function createIpcEventArray (line 132) | std::unique_ptr<optional<CudaEvent>[]> createIpcEventArray(
        function getIpcHandlesForEventArray (line 161) | std::vector<cudaIpcEventHandle_t> getIpcHandlesForEventArray(
        function CudaLib (line 337) | const CudaLib& ContextImpl::getCudaLib() {

FILE: tensorpipe/channel/cuda_ipc/context_impl.h
  function namespace (line 25) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_ipc/factory.cc
  type tensorpipe (line 15) | namespace tensorpipe {
    type channel (line 16) | namespace channel {
      type cuda_ipc (line 17) | namespace cuda_ipc {
        function create (line 19) | std::shared_ptr<Context> create() {

FILE: tensorpipe/channel/cuda_ipc/factory.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_xth/channel_impl.cc
  type tensorpipe (line 24) | namespace tensorpipe {
    type channel (line 25) | namespace channel {
      type cuda_xth (line 26) | namespace cuda_xth {
        type Descriptor (line 30) | struct Descriptor {

FILE: tensorpipe/channel/cuda_xth/channel_impl.h
  function namespace (line 22) | namespace cuda_xth {

FILE: tensorpipe/channel/cuda_xth/context_impl.cc
  type tensorpipe (line 24) | namespace tensorpipe {
    type channel (line 25) | namespace channel {
      type cuda_xth (line 26) | namespace cuda_xth {
        function CudaLib (line 94) | const CudaLib& ContextImpl::getCudaLib() {

FILE: tensorpipe/channel/cuda_xth/context_impl.h
  function namespace (line 16) | namespace tensorpipe {

FILE: tensorpipe/channel/cuda_xth/factory.cc
  type tensorpipe (line 15) | namespace tensorpipe {
    type channel (line 16) | namespace channel {
      type cuda_xth (line 17) | namespace cuda_xth {
        function create (line 19) | std::shared_ptr<Context> create() {

FILE: tensorpipe/channel/cuda_xth/factory.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/channel/error.cc
  type tensorpipe (line 14) | namespace tensorpipe {
    type channel (line 15) | namespace channel {

FILE: tensorpipe/channel/error.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/channel/helpers.cc
  type tensorpipe (line 16) | namespace tensorpipe {
    type channel (line 17) | namespace channel {
      function saveDescriptor (line 19) | std::string saveDescriptor(const AbstractNopHolder& object) {
      function loadDescriptor (line 32) | void loadDescriptor(AbstractNopHolder& object, const std::string& in) {

FILE: tensorpipe/channel/helpers.h
  function namespace (line 17) | namespace tensorpipe {

FILE: tensorpipe/channel/mpt/channel_impl.cc
  type tensorpipe (line 21) | namespace tensorpipe {
    type channel (line 22) | namespace channel {
      type mpt (line 23) | namespace mpt {

FILE: tensorpipe/channel/mpt/channel_impl.h
  function namespace (line 22) | namespace tensorpipe {

FILE: tensorpipe/channel/mpt/context_impl.cc
  type tensorpipe (line 24) | namespace tensorpipe {
    type channel (line 25) | namespace channel {
      type mpt (line 26) | namespace mpt {
        function generateDomainDescriptor (line 30) | std::string generateDomainDescriptor(

FILE: tensorpipe/channel/mpt/context_impl.h
  function namespace (line 26) | namespace tensorpipe {

FILE: tensorpipe/channel/mpt/factory.cc
  type tensorpipe (line 15) | namespace tensorpipe {
    type channel (line 16) | namespace channel {
      type mpt (line 17) | namespace mpt {
        function create (line 19) | std::shared_ptr<Context> create(

FILE: tensorpipe/channel/mpt/factory.h
  function namespace (line 17) | namespace tensorpipe {

FILE: tensorpipe/channel/mpt/nop_types.h
  function namespace (line 18) | namespace tensorpipe {

FILE: tensorpipe/channel/xth/channel_impl.cc
  type tensorpipe (line 24) | namespace tensorpipe {
    type channel (line 25) | namespace channel {
      type xth (line 26) | namespace xth {
        type Descriptor (line 30) | struct Descriptor {

FILE: tensorpipe/channel/xth/channel_impl.h
  type SendOperation (line 24) | struct SendOperation {
  function State (line 29) | State state{UNINITIALIZED};

FILE: tensorpipe/channel/xth/context_impl.cc
  type tensorpipe (line 25) | namespace tensorpipe {
    type channel (line 26) | namespace channel {
      type xth (line 27) | namespace xth {
        function Error (line 93) | const Error& error) {

FILE: tensorpipe/channel/xth/context_impl.h
  function namespace (line 22) | namespace tensorpipe {

FILE: tensorpipe/channel/xth/factory.cc
  type tensorpipe (line 15) | namespace tensorpipe {
    type channel (line 16) | namespace channel {
      type xth (line 17) | namespace xth {
        function create (line 19) | std::shared_ptr<Context> create() {

FILE: tensorpipe/channel/xth/factory.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/common/address.cc
  type tensorpipe (line 13) | namespace tensorpipe {
    function splitSchemeOfURL (line 15) | std::tuple<std::string, std::string> splitSchemeOfURL(const std::strin...

FILE: tensorpipe/common/address.h
  function namespace (line 13) | namespace tensorpipe {

FILE: tensorpipe/common/allocator.cc
  type tensorpipe (line 14) | namespace tensorpipe {

FILE: tensorpipe/common/allocator.h
  function namespace (line 20) | namespace tensorpipe {

FILE: tensorpipe/common/buffer.h
  function namespace (line 19) | namespace tensorpipe {

FILE: tensorpipe/common/busy_polling_loop.h
  function stopBusyPolling (line 27) | void stopBusyPolling() {
  function eventLoop (line 32) | void eventLoop() override {
  function wakeupEventLoopToDeferFunction (line 44) | void wakeupEventLoopToDeferFunction() override {

FILE: tensorpipe/common/callback.h
  function TStoredArgs (line 56) | TStoredArgs args{std::move(args_.front())};
  function TFn (line 66) | TFn fn{std::move(callbacks_.front())};
  function triggerAll (line 77) | void triggerAll(std::function<std::tuple<Args...>()> generator) {

FILE: tensorpipe/common/cpu_buffer.h
  function namespace (line 13) | namespace tensorpipe {

FILE: tensorpipe/common/cuda.h
  function namespace (line 35) | namespace tensorpipe {
  function cudaDeviceForPointer (line 141) | inline int cudaDeviceForPointer(const CudaLib& cudaLib, const void* ptr) {
  function class (line 168) | class CudaPinnedMemoryDeleter {
  function CudaPinnedBuffer (line 183) | inline CudaPinnedBuffer makeCudaPinnedBuffer(size_t length, int deviceId...
  function class (line 190) | class CudaDeviceBuffer {
  function reset (line 209) | void reset() {
  function cudaIpcMemHandle_t (line 213) | cudaIpcMemHandle_t getIpcHandle() const {
  function reset (line 253) | void reset() {
  type Deleter (line 258) | struct Deleter {
  function std (line 270) | inline std::string getUuidOfDevice(const CudaLib& cudaLib, int deviceIdx) {
  function std (line 299) | inline std::vector<std::string> getUuidsOfVisibleDevices(
  function std (line 312) | inline std::vector<Device> getCudaDevices(const CudaLib& cudaLib) {

FILE: tensorpipe/common/cuda_buffer.cc
  type tensorpipe (line 14) | namespace tensorpipe {
    function Device (line 16) | Device CudaBuffer::getDevice() const {

FILE: tensorpipe/common/cuda_buffer.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/common/cuda_lib.h
  function namespace (line 34) | namespace tensorpipe {

FILE: tensorpipe/common/cuda_loop.cc
  type tensorpipe (line 14) | namespace tensorpipe {
    type CudaCallback (line 18) | struct CudaCallback {
      method CudaCallback (line 22) | CudaCallback(CudaLoop& loop, std::function<void(const Error&)> callb...
    class CudaLoopClosedError (line 26) | class CudaLoopClosedError final : public BaseError {
      method what (line 27) | std::string what() const override {

FILE: tensorpipe/common/cuda_loop.h
  type Operation (line 26) | struct Operation {
  function pendingOperations_ (line 49) | uint64_t pendingOperations_{0}
  function closed_ (line 51) | bool closed_{false};

FILE: tensorpipe/common/deferred_executor.h
  function namespace (line 26) | namespace tensorpipe {
  function isThreadConsumingDeferredFunctions_ (line 270) | bool isThreadConsumingDeferredFunctions_{false};

FILE: tensorpipe/common/defs.h
  function namespace (line 33) | namespace tensorpipe {
  function namespace (line 113) | namespace tensorpipe {
  function namespace (line 132) | namespace tensorpipe {
  function namespace (line 271) | namespace tensorpipe {

FILE: tensorpipe/common/device.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/common/dl.h
  function namespace (line 25) | namespace tensorpipe {

FILE: tensorpipe/common/epoll_loop.cc
  type tensorpipe (line 15) | namespace tensorpipe {
    type epoll_event (line 32) | struct epoll_event
    type epoll_event (line 77) | struct epoll_event
    type epoll_event (line 138) | struct epoll_event
    type epoll_event (line 174) | struct epoll_event

FILE: tensorpipe/common/epoll_loop.h
  function nextRecord_ (line 133) | uint64_t nextRecord_{1}

FILE: tensorpipe/common/error.cc
  type tensorpipe (line 16) | namespace tensorpipe {

FILE: tensorpipe/common/error.h
  function namespace (line 14) | namespace tensorpipe {

FILE: tensorpipe/common/fd.cc
  type tensorpipe (line 17) | namespace tensorpipe {
    function Error (line 45) | Error Fd::readFull(void* buf, size_t count) {
    function Error (line 57) | Error Fd::writeFull(const void* buf, size_t count) {

FILE: tensorpipe/common/fd.h
  function namespace (line 18) | namespace tensorpipe {

FILE: tensorpipe/common/ibv.cc
  type tensorpipe (line 14) | namespace tensorpipe {
    function ibvWorkCompletionOpcodeToStr (line 16) | std::string ibvWorkCompletionOpcodeToStr(IbvLib::wc_opcode opcode) {
    function makeIbvAddress (line 39) | struct IbvAddress makeIbvAddress(
    function makeIbvSetupInformation (line 63) | struct IbvSetupInformation makeIbvSetupInformation(
    function transitionIbvQueuePairToInit (line 78) | void transitionIbvQueuePairToInit(
    function transitionIbvQueuePairToReadyToReceive (line 105) | void transitionIbvQueuePairToReadyToReceive(
    function transitionIbvQueuePairToReadyToSend (line 152) | void transitionIbvQueuePairToReadyToSend(
    function transitionIbvQueuePairToError (line 182) | void transitionIbvQueuePairToError(const IbvLib& ibvLib, IbvQueuePair&...

FILE: tensorpipe/common/ibv.h
  function namespace (line 16) | namespace tensorpipe {
  type IbvContextDeleter (line 95) | struct IbvContextDeleter {
  function IbvContext (line 105) | inline IbvContext createIbvContext(
  type IbvProtectionDomainDeleter (line 113) | struct IbvProtectionDomainDeleter {
  function IbvProtectionDomain (line 124) | inline IbvProtectionDomain createIbvProtectionDomain(
  type IbvCompletionQueueDeleter (line 132) | struct IbvCompletionQueueDeleter {
  function IbvCompletionQueue (line 143) | inline IbvCompletionQueue createIbvCompletionQueue(
  type IbvSharedReceiveQueueDeleter (line 156) | struct IbvSharedReceiveQueueDeleter {
  function IbvSharedReceiveQueue (line 167) | inline IbvSharedReceiveQueue createIbvSharedReceiveQueue(
  type IbvMemoryRegionDeleter (line 176) | struct IbvMemoryRegionDeleter {
  function IbvMemoryRegion (line 186) | inline IbvMemoryRegion createIbvMemoryRegion(
  type IbvQueuePairDeleter (line 197) | struct IbvQueuePairDeleter {
  function IbvQueuePair (line 207) | inline IbvQueuePair createIbvQueuePair(
  type IbvAddress (line 218) | struct IbvAddress {
  type IbvSetupInformation (line 229) | struct IbvSetupInformation {
  type IbvAddress (line 237) | struct IbvAddress
  type IbvSetupInformation (line 243) | struct IbvSetupInformation

FILE: tensorpipe/common/ibv_lib.h
  function namespace (line 16) | namespace tensorpipe {

FILE: tensorpipe/common/memory.h
  function namespace (line 20) | namespace tensorpipe {

FILE: tensorpipe/common/nop.h
  function namespace (line 19) | namespace tensorpipe {
  function class (line 129) | class NopWriter final {
  function class (line 208) | class AbstractNopHolder {
  function override (line 231) | const override {
  function namespace (line 245) | namespace nop {

FILE: tensorpipe/common/nvml_lib.h
  function namespace (line 28) | namespace tensorpipe {

FILE: tensorpipe/common/optional.h
  function namespace (line 5) | namespace tensorpipe {

FILE: tensorpipe/common/queue.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/common/ringbuffer.h
  function namespace (line 43) | namespace tensorpipe {

FILE: tensorpipe/common/ringbuffer_read_write_ops.h
  type Mode (line 33) | enum Mode {
  function AbstractNopHolder (line 63) | AbstractNopHolder* nopObject_{nullptr};
  function class (line 84) | class RingbufferWriteOperation {

FILE: tensorpipe/common/ringbuffer_role.h
  function namespace (line 18) | namespace tensorpipe {

FILE: tensorpipe/common/shm_ringbuffer.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/common/shm_segment.cc
  type tensorpipe (line 26) | namespace tensorpipe {
    function createMemfd (line 49) | std::tuple<Error, Fd> createMemfd() {
    function openTmpfileInDevShm (line 82) | std::tuple<Error, Fd> openTmpfileInDevShm() {
    function createShmFd (line 99) | std::tuple<Error, Fd> createShmFd() {
    function mmapShmFd (line 110) | std::tuple<Error, MmappedPtr> mmapShmFd(int fd, size_t byteSize) {
    type stat (line 149) | struct stat

FILE: tensorpipe/common/shm_segment.h
  function namespace (line 26) | namespace tensorpipe {

FILE: tensorpipe/common/socket.cc
  type tensorpipe (line 23) | namespace tensorpipe {
    function Error (line 43) | Error Socket::block(bool on) {
    function Error (line 63) | Error Socket::reuseAddr(bool on) {
    function Error (line 72) | Error Socket::bind(const Sockaddr& addr) {
    function Error (line 80) | Error Socket::listen(int backlog) {
    type sockaddr_storage (line 89) | struct sockaddr_storage
    type sockaddr (line 93) | struct sockaddr
    function Error (line 106) | Error Socket::connect(const Sockaddr& addr) {
    type sockaddr_storage (line 122) | struct sockaddr_storage
    type sockaddr_storage (line 124) | struct sockaddr_storage

FILE: tensorpipe/common/socket.h
  function namespace (line 23) | namespace tensorpipe {
  function Error (line 117) | [[nodiscard]] Error sendFdsToSocket(int socketFd, const Fds&... fds) {
  function Error (line 123) | [[nodiscard]] Error recvFromSocket(int socketFd, T& t1, T& t2, Fds&... f...
  type cmsghdr (line 164) | struct cmsghdr
  function Error (line 177) | [[nodiscard]] Error recvFdsFromSocket(int socketFd, Fds&... fds) {
  function class (line 182) | class Sockaddr {

FILE: tensorpipe/common/state_machine.h
  function advanceOperation (line 51) | void advanceOperation(Iter initialOpIter) {
  function advanceAllOperations (line 65) | void advanceAllOperations() {

FILE: tensorpipe/common/stream_read_write_ops.h
  function namespace (line 21) | namespace tensorpipe {

FILE: tensorpipe/common/strings.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/common/system.cc
  type tensorpipe (line 49) | namespace tensorpipe {
    function getBootIDInternal (line 54) | optional<std::string> getBootIDInternal() {
    function getBootIDInternal (line 71) | optional<std::string> getBootIDInternal() {
    function getPathForLinuxNamespace (line 83) | std::string getPathForLinuxNamespace(LinuxNamespace ns) {
    function tstampToStr (line 109) | std::string tstampToStr(TimeStamp ts) {
    function getProcFsStr (line 121) | optional<std::string> getProcFsStr(const std::string& fileName, pid_t ...
    function removeBlankSpaces (line 134) | std::string removeBlankSpaces(std::string s) {
    function getBootID (line 143) | optional<std::string> getBootID() {
    function getLinuxNamespaceId (line 154) | optional<std::string> getLinuxNamespaceId(LinuxNamespace ns) {
    function getLinuxNamespaceId (line 166) | optional<std::string> getLinuxNamespaceId(LinuxNamespace ns) {
    function getLinuxSecurityModules (line 205) | optional<std::vector<std::string>> getLinuxSecurityModules() {
    function getYamaPtraceScope (line 226) | optional<YamaPtraceScope> getYamaPtraceScope() {
    function getPermittedCapabilitiesID (line 252) | optional<std::string> getPermittedCapabilitiesID() {
    function setThreadName (line 284) | void setThreadName(std::string name) {

FILE: tensorpipe/common/system.h
  function isPow2 (line 49) | constexpr bool isPow2(uint64_t n) noexcept {
  function nextPow2 (line 54) | constexpr uint32_t nextPow2(uint32_t n) noexcept {
  function nextPow2 (line 67) | constexpr uint64_t nextPow2(uint64_t n) noexcept {
  function maxPow2LessEqualThan (line 81) | constexpr uint64_t maxPow2LessEqualThan(uint64_t n) noexcept {
  type class (line 91) | enum class
  function YamaPtraceScope (line 109) | enum class YamaPtraceScope {

FILE: tensorpipe/core/context.cc
  type tensorpipe (line 18) | namespace tensorpipe {

FILE: tensorpipe/core/context.h
  function namespace (line 20) | namespace tensorpipe {
  function class (line 42) | class PipeOptions {
  function class (line 58) | class Context final {

FILE: tensorpipe/core/context_impl.cc
  type tensorpipe (line 33) | namespace tensorpipe {
    function createContextId (line 39) | std::string createContextId() {

FILE: tensorpipe/core/context_impl.h
  function namespace (line 24) | namespace tensorpipe {

FILE: tensorpipe/core/error.cc
  type tensorpipe (line 13) | namespace tensorpipe {

FILE: tensorpipe/core/error.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/core/listener.cc
  type tensorpipe (line 19) | namespace tensorpipe {

FILE: tensorpipe/core/listener.h
  function namespace (line 19) | namespace tensorpipe {

FILE: tensorpipe/core/listener_impl.cc
  type tensorpipe (line 32) | namespace tensorpipe {

FILE: tensorpipe/core/listener_impl.h
  function namespace (line 28) | namespace tensorpipe {

FILE: tensorpipe/core/message.h
  function namespace (line 18) | namespace tensorpipe {
  function class (line 63) | class Descriptor final {
  type Tensor (line 73) | struct Tensor {
  function class (line 91) | class Allocation final {

FILE: tensorpipe/core/nop_types.h
  function namespace (line 24) | namespace tensorpipe {

FILE: tensorpipe/core/pipe.cc
  type tensorpipe (line 17) | namespace tensorpipe {

FILE: tensorpipe/core/pipe.h
  function namespace (line 19) | namespace tensorpipe {

FILE: tensorpipe/core/pipe_impl.cc
  type tensorpipe (line 26) | namespace tensorpipe {
    function parseDescriptorReplyOfMessage (line 30) | void parseDescriptorReplyOfMessage(
    function checkAllocationCompatibility (line 49) | void checkAllocationCompatibility(
    function makeDescriptorForMessage (line 70) | std::shared_ptr<NopHolder<Descriptor>> makeDescriptorForMessage(
    function makeDescriptorReplyForMessage (line 102) | std::shared_ptr<NopHolder<DescriptorReply>> makeDescriptorReplyForMess...
    type SelectedTransport (line 118) | struct SelectedTransport {
    function SelectedTransport (line 124) | SelectedTransport selectTransport(
    type SelectedChannels (line 159) | struct SelectedChannels {
    function SelectedChannels (line 166) | SelectedChannels selectChannels(
  function Error (line 417) | const Error& error) {
  function Error (line 555) | const Error& error) {

FILE: tensorpipe/core/pipe_impl.h
  type ReadOperation (line 37) | struct ReadOperation {
  function numPayloadsBeingRead (line 54) | uint64_t numPayloadsBeingRead{0}
  function numTensorsBeingReceived (line 55) | uint64_t numTensorsBeingReceived{0}
  function hasMissingTargetDevices (line 62) | bool hasMissingTargetDevices{false};
  type WriteOperation (line 69) | struct WriteOperation {
  function numPayloadsBeingWritten (line 83) | uint64_t numPayloadsBeingWritten{0}
  function numTensorsBeingSent (line 84) | uint64_t numTensorsBeingSent{0}
  function hasMissingTargetDevices (line 90) | bool hasMissingTargetDevices{false};
  type State (line 143) | enum State {
  type ConnectionId (line 168) | enum ConnectionId { DESCRIPTOR, DESCRIPTOR_REPLY }
  function nextMessageBeingRead_ (line 198) | uint64_t nextMessageBeingRead_{0}
  function nextMessageBeingWritten_ (line 199) | uint64_t nextMessageBeingWritten_{0}
  function nextReadDescriptorCallbackToCall_ (line 202) | uint64_t nextReadDescriptorCallbackToCall_{0}
  function nextReadCallbackToCall_ (line 203) | uint64_t nextReadCallbackToCall_{0}
  function nextWriteCallbackToCall_ (line 204) | uint64_t nextWriteCallbackToCall_{0}
  type ConnectionState (line 211) | enum ConnectionState { AWAITING_DESCRIPTOR, AWAITING_PAYLOADS }
  function ConnectionState (line 212) | ConnectionState connectionState_{AWAITING_DESCRIPTOR};

FILE: tensorpipe/misc/dump_state_machine.cc
  function exprToString (line 26) | std::string exprToString(const clang::Expr& e) {
  function cleanUp (line 35) | std::string cleanUp(const std::string& s) {
  function escape (line 42) | std::string escape(const std::string& s) {
  class MethodPrinter (line 52) | class MethodPrinter : public MatchFinder::MatchCallback {
    method addNode (line 55) | void addNode(const std::string& label) {
    method run (line 62) | void run(const MatchFinder::MatchResult& result) override {
  function main (line 112) | int main(int argc, const char* argv[]) {

FILE: tensorpipe/python/tensorpipe.cc
  class BufferWrapper (line 28) | class BufferWrapper {
    method BufferWrapper (line 30) | BufferWrapper(const py::buffer& buffer, int flags) {
    method BufferWrapper (line 36) | BufferWrapper(const BufferWrapper& other) = delete;
    method BufferWrapper (line 38) | BufferWrapper(BufferWrapper&& other) = delete;
    method BufferWrapper (line 40) | BufferWrapper& operator=(const BufferWrapper& other) = delete;
    method BufferWrapper (line 42) | BufferWrapper& operator=(BufferWrapper&& other) = delete;
    method length (line 52) | size_t length() const {
    method getBuffer (line 56) | py::buffer_info getBuffer() {
  class OutgoingPayload (line 70) | class OutgoingPayload {
    method OutgoingPayload (line 75) | OutgoingPayload(const py::buffer& buffer, const py::buffer& metadata)
  class OutgoingTensor (line 79) | class OutgoingTensor {
    method OutgoingTensor (line 84) | OutgoingTensor(const py::buffer& buffer, const py::buffer& metadata)
  class OutgoingMessage (line 88) | class OutgoingMessage {
    method OutgoingMessage (line 94) | OutgoingMessage(
  function prepareToWrite (line 103) | tensorpipe::Message prepareToWrite(std::shared_ptr<OutgoingMessage> pyMe...
  class IncomingPayload (line 132) | class IncomingPayload {
    method IncomingPayload (line 138) | IncomingPayload(size_t length, py::bytes metadata)
    method set_buffer (line 141) | void set_buffer(const py::buffer& pyBuffer) {
  class IncomingTensor (line 151) | class IncomingTensor {
    method IncomingTensor (line 157) | IncomingTensor(size_t length, py::bytes metadata)
    method set_buffer (line 160) | void set_buffer(const py::buffer& pyBuffer) {
  class IncomingMessage (line 170) | class IncomingMessage {
    method IncomingMessage (line 176) | IncomingMessage(
  function prepareToAllocate (line 183) | std::shared_ptr<IncomingMessage> prepareToAllocate(
  function prepareToRead (line 202) | tensorpipe::Allocation prepareToRead(
  function PYBIND11_MODULE (line 229) | PYBIND11_MODULE(pytensorpipe, module) {

FILE: tensorpipe/test/channel/basic/basic_test.cc
  class BasicChannelTestHelper (line 14) | class BasicChannelTestHelper : public CpuChannelTestHelper {
    method makeContextInternal (line 16) | std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(

FILE: tensorpipe/test/channel/channel_test.cc
  class DeviceDescriptorsTest (line 18) | class DeviceDescriptorsTest : public ChannelTestCase {
    method run (line 20) | void run(ChannelTestHelper* helper) override {
  class ClientToServerTest (line 45) | class ClientToServerTest : public ClientServerChannelTestCase {
    method server (line 49) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 64) | void client(std::shared_ptr<Channel> channel) override {
  class ServerToClientTest (line 86) | class ServerToClientTest : public ClientServerChannelTestCase {
    method server (line 90) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 109) | void client(std::shared_ptr<Channel> channel) override {
  class SendMultipleTensorsTest (line 127) | class SendMultipleTensorsTest : public ClientServerChannelTestCase {
    method server (line 134) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 157) | void client(std::shared_ptr<Channel> channel) override {
  class SendTensorsBothWaysTest (line 191) | class SendTensorsBothWaysTest : public ClientServerChannelTestCase {
    method server (line 194) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 226) | void client(std::shared_ptr<Channel> channel) override {
  class EmptyTensorTest (line 262) | class EmptyTensorTest : public ClientServerChannelTestCase {
    method server (line 263) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 278) | void client(std::shared_ptr<Channel> channel) override {
  class EmptyAndNonEmptyTensorsTest (line 297) | class EmptyAndNonEmptyTensorsTest : public ClientServerChannelTestCase {
    method server (line 298) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 317) | void client(std::shared_ptr<Channel> channel) override {

FILE: tensorpipe/test/channel/channel_test.h
  function class (line 30) | class DataWrapper {
  function virtual (line 278) | virtual void server(
  function virtual (line 280) | virtual void client(
  function virtual (line 283) | virtual void afterServer() {}
  function virtual (line 284) | virtual void afterClient() {}
  function class (line 291) | class ChannelTestSuite : public ::testing::TestWithParam<ChannelTestHelp...

FILE: tensorpipe/test/channel/channel_test_cpu.cc
  class NullPointerTest (line 19) | class NullPointerTest : public ClientServerChannelTestCase {
    method server (line 20) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 31) | void client(std::shared_ptr<Channel> channel) override {
  class CallbacksAreDeferredTest (line 51) | class CallbacksAreDeferredTest : public ClientServerChannelTestCase {
    method server (line 55) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 79) | void client(std::shared_ptr<Channel> channel) override {

FILE: tensorpipe/test/channel/channel_test_cpu.h
  function class (line 18) | class CpuDataWrapper : public DataWrapper {
  function class (line 40) | class CpuChannelTestHelper : public ChannelTestHelper {
  function class (line 52) | class CpuChannelTestSuite

FILE: tensorpipe/test/channel/channel_test_cuda.cc
  class ReceiverWaitsForStartEventTest (line 19) | class ReceiverWaitsForStartEventTest : public ClientServerChannelTestCase {
    method server (line 22) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 58) | void client(std::shared_ptr<Channel> channel) override {
  class SendOffsetAllocationTest (line 96) | class SendOffsetAllocationTest : public ClientServerChannelTestCase {
    method server (line 101) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 122) | void client(std::shared_ptr<Channel> channel) override {

FILE: tensorpipe/test/channel/channel_test_cuda.h
  function class (line 19) | class CudaDataWrapper : public DataWrapper {

FILE: tensorpipe/test/channel/channel_test_cuda_multi_gpu.cc
  class SendAcrossDevicesTest (line 19) | class SendAcrossDevicesTest : public ClientServerChannelTestCase {
    method run (line 23) | void run(ChannelTestHelper* helper) override {
    method server (line 32) | void server(std::shared_ptr<Channel> channel) override {
    method afterServer (line 73) | void afterServer() override {
    method client (line 81) | void client(std::shared_ptr<Channel> channel) override {
    method afterClient (line 123) | void afterClient() override {
  class SendReverseAcrossDevicesTest (line 134) | class SendReverseAcrossDevicesTest : public ClientServerChannelTestCase {
    method run (line 138) | void run(ChannelTestHelper* helper) override {
    method server (line 147) | void server(std::shared_ptr<Channel> channel) override {
    method afterServer (line 188) | void afterServer() override {
    method client (line 196) | void client(std::shared_ptr<Channel> channel) override {
    method afterClient (line 238) | void afterClient() override {
  class SendAcrossNonDefaultDevicesTest (line 249) | class SendAcrossNonDefaultDevicesTest : public ClientServerChannelTestCa...
    method run (line 253) | void run(ChannelTestHelper* helper) override {
    method server (line 262) | void server(std::shared_ptr<Channel> channel) override {
    method afterServer (line 303) | void afterServer() override {
    method client (line 307) | void client(std::shared_ptr<Channel> channel) override {
    method afterClient (line 349) | void afterClient() override {

FILE: tensorpipe/test/channel/channel_test_cuda_xdtt.cc
  class SendFromCpuToGpuTest (line 19) | class SendFromCpuToGpuTest : public ClientServerChannelTestCase {
    method server (line 22) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 44) | void client(std::shared_ptr<Channel> channel) override {
  class SendFromGpuToCpuTest (line 82) | class SendFromGpuToCpuTest : public ClientServerChannelTestCase {
    method server (line 85) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 118) | void client(std::shared_ptr<Channel> channel) override {
  class SendFromCpuToCpuTest (line 145) | class SendFromCpuToCpuTest : public ClientServerChannelTestCase {
    method server (line 148) | void server(std::shared_ptr<Channel> channel) override {
    method client (line 170) | void client(std::shared_ptr<Channel> channel) override {

FILE: tensorpipe/test/channel/cma/cma_test.cc
  class CmaChannelTestHelper (line 14) | class CmaChannelTestHelper : public CpuChannelTestHelper {
    method makeContextInternal (line 16) | std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(

FILE: tensorpipe/test/channel/cma/probe.cc
  function main (line 23) | int main(int argc, char* argv[]) {

FILE: tensorpipe/test/channel/cuda_basic/cuda_basic_test.cc
  class CudaBasicChannelTestHelper (line 17) | class CudaBasicChannelTestHelper : public CudaChannelTestHelper {
    method makeContextInternal (line 19) | std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(
    method makePeerGroup (line 29) | std::shared_ptr<PeerGroup> makePeerGroup() override {
  class CudaBasicChannelTestSuite (line 36) | class CudaBasicChannelTestSuite : public ChannelTestSuite {}
  class CannotCommunicateCpuToCpuTest (line 40) | class CannotCommunicateCpuToCpuTest : public ChannelTestCase {
    method run (line 42) | void run(ChannelTestHelper* /* unused */) override {

FILE: tensorpipe/test/channel/cuda_gdr/cuda_gdr_test.cc
  class CudaGdrChannelTestHelper (line 16) | class CudaGdrChannelTestHelper : public CudaChannelTestHelper {
    method makeContextInternal (line 18) | std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(
    method makePeerGroup (line 26) | std::shared_ptr<PeerGroup> makePeerGroup() override {

FILE: tensorpipe/test/channel/cuda_helpers.h
  function namespace (line 23) | namespace tensorpipe {
  function testing (line 50) | inline ::testing::AssertionResult initializedCudaContexts(

FILE: tensorpipe/test/channel/cuda_ipc/cuda_ipc_test.cc
  class CudaIpcChannelTestHelper (line 16) | class CudaIpcChannelTestHelper : public CudaChannelTestHelper {
    method makeContextInternal (line 18) | std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(
    method makePeerGroup (line 26) | std::shared_ptr<PeerGroup> makePeerGroup() override {
  class CudaIpcChannelTestSuite (line 33) | class CudaIpcChannelTestSuite : public ChannelTestSuite {}
  class CannotCommunicateInSameProcessTest (line 37) | class CannotCommunicateInSameProcessTest : public ChannelTestCase {
    method run (line 39) | void run(ChannelTestHelper* /* unused */) override {

FILE: tensorpipe/test/channel/cuda_xth/cuda_xth_test.cc
  class CudaXthChannelTestHelper (line 16) | class CudaXthChannelTestHelper : public CudaChannelTestHelper {
    method makeContextInternal (line 18) | std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(
    method makePeerGroup (line 26) | std::shared_ptr<PeerGroup> makePeerGroup() override {

FILE: tensorpipe/test/channel/mpt/mpt_test.cc
  class MptChannelTestHelper (line 17) | class MptChannelTestHelper : public CpuChannelTestHelper {
    method makeContextInternal (line 19) | std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(
  class MptChannelTestSuite (line 38) | class MptChannelTestSuite : public ChannelTestSuite {}
  class ContextIsNotJoinedTest (line 42) | class ContextIsNotJoinedTest : public ChannelTestCase {
    method run (line 48) | void run(ChannelTestHelper* helper) override {
    method server (line 86) | void server(std::shared_ptr<tensorpipe::transport::Connection> conn) {
    method client (line 94) | void client(std::shared_ptr<tensorpipe::transport::Connection> conn) {

FILE: tensorpipe/test/channel/xth/xth_test.cc
  class XthChannelTestHelper (line 14) | class XthChannelTestHelper : public CpuChannelTestHelper {
    method makeContextInternal (line 16) | std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(

FILE: tensorpipe/test/common/cuda_test.cc
  function getCudaLib (line 20) | tensorpipe::CudaLib getCudaLib() {
  function TEST (line 33) | TEST(Cuda, DeviceForPointer) {
  function TEST (line 64) | TEST(Cuda, DeviceForPointerAfterReset) {

FILE: tensorpipe/test/common/defs_test.cc
  function TEST (line 13) | TEST(Defs, Exception) {

FILE: tensorpipe/test/common/epoll_loop_test.cc
  class Handler (line 22) | class Handler : public EpollLoop::EventHandler {
    method handleEventsFromLoop (line 24) | void handleEventsFromLoop(int events) override {
    method nextEvents (line 30) | int nextEvents() {
  class FunctionEventHandler (line 50) | class FunctionEventHandler
  function createMonitor (line 121) | std::shared_ptr<FunctionEventHandler> createMonitor(
  function TEST (line 146) | TEST(ShmLoop, RegisterUnregister) {
  function TEST (line 174) | TEST(ShmLoop, Monitor) {
  function TEST (line 245) | TEST(ShmLoop, Defer) {

FILE: tensorpipe/test/common/ringbuffer_test.cc
  type TestData (line 18) | struct TestData {
  class RingBufferStorage (line 35) | class RingBufferStorage {
    method RingBufferStorage (line 37) | explicit RingBufferStorage(size_t size) : header_(size) {}
    method getRb (line 39) | RingBuffer<kNumRingbufferRoles> getRb() {
  function usedSize (line 49) | size_t usedSize(RingBuffer<kNumRingbufferRoles>& rb) {
  function TEST (line 54) | TEST(RingBuffer, WriteCopy) {
  function TEST (line 120) | TEST(RingBuffer, ReadMultipleElems) {
  function TEST (line 201) | TEST(RingBuffer, CopyWrapping) {
  function TEST (line 256) | TEST(RingBuffer, ReadTxWrappingOneCons) {
  function TEST (line 402) | TEST(RingBuffer, ReadTxWrapping) {
  function TEST (line 558) | TEST(RingBuffer, accessContiguousInTx) {

FILE: tensorpipe/test/common/shm_ringbuffer_test.cc
  function TEST (line 30) | TEST(ShmRingBuffer, SameProducerConsumer) {
  function TEST (line 82) | TEST(ShmRingBuffer, SingleProducer_SingleConsumer) {

FILE: tensorpipe/test/common/shm_segment_test.cc
  function TEST (line 22) | TEST(ShmSegment, SameProducerConsumer_Scalar) {
  function TEST (line 60) | TEST(ShmSegment, SingleProducer_SingleConsumer_Array) {

FILE: tensorpipe/test/common/system_test.cc
  function TEST (line 15) | TEST(Pow2, isPow2) {
  function TEST (line 31) | TEST(Pow2, nextPow2) {

FILE: tensorpipe/test/core/context_test.cc
  function buffersAreEqual (line 29) | ::testing::AssertionResult buffersAreEqual(
  function unwrapCudaBuffer (line 66) | std::vector<uint8_t> unwrapCudaBuffer(CudaBuffer b, size_t length) {
  function descriptorAndAllocationMatchMessage (line 75) | ::testing::AssertionResult descriptorAndAllocationMatchMessage(
  type CudaPointerDeleter (line 130) | struct CudaPointerDeleter {
  function makeCudaPointer (line 136) | std::unique_ptr<void, CudaPointerDeleter> makeCudaPointer(size_t length) {
  function makeTensor (line 155) | Message::Tensor makeTensor(int index) {
  function Message (line 187) | Message makeMessage(int numPayloads, int numTensors) {
  function Allocation (line 202) | Allocation allocateForDescriptor(
  function Message (line 243) | Message messageFromAllocation(
  function genUrls (line 267) | std::vector<std::string> genUrls() {
  function makeContext (line 278) | std::shared_ptr<Context> makeContext() {
  function TEST (line 303) | TEST(Context, ClientPingSerial) {
  function TEST (line 384) | TEST(Context, ClientPingInline) {

FILE: tensorpipe/test/core/listener_test.cc
  function TEST (line 21) | TEST(Listener, ClosingAbortsOperations) {

FILE: tensorpipe/test/core/pipe_cuda_test.cc
  class CudaSimpleWriteReadWithAllTargetDevicesTest (line 13) | class CudaSimpleWriteReadWithAllTargetDevicesTest
    method server (line 53) | void server(Pipe& pipe) override {
    method client (line 61) | void client(Pipe& pipe) override {
  function TEST (line 78) | TEST(Pipe, CudaSimpleWriteReadWithAllTargetDevices) {
  class CudaSimpleWriteReadWithSomeTargetDevicesTest (line 83) | class CudaSimpleWriteReadWithSomeTargetDevicesTest
    method server (line 115) | void server(Pipe& pipe) override {
    method client (line 123) | void client(Pipe& pipe) override {
  function TEST (line 139) | TEST(Pipe, CudaSimpleWriteReadWithSomeTargetDevices) {
  class CudaSimpleWriteReadWithoutTargetDeviceTest (line 144) | class CudaSimpleWriteReadWithoutTargetDeviceTest
    method server (line 180) | void server(Pipe& pipe) override {
    method client (line 188) | void client(Pipe& pipe) override {
  function TEST (line 205) | TEST(Pipe, CudaSimpleWriteReadWithoutTargetDevice) {

FILE: tensorpipe/test/core/pipe_test.cc
  class SimpleWriteReadTest (line 13) | class SimpleWriteReadTest : public ClientServerPipeTestCase {
    method server (line 43) | void server(Pipe& pipe) override {
    method client (line 51) | void client(Pipe& pipe) override {
  function TEST (line 67) | TEST(Pipe, SimpleWriteRead) {
  class SimpleWriteReadPayloadsOnlyTest (line 72) | class SimpleWriteReadPayloadsOnlyTest : public ClientServerPipeTestCase {
    method server (line 85) | void server(Pipe& pipe) override {
    method client (line 93) | void client(Pipe& pipe) override {
  function TEST (line 102) | TEST(Pipe, SimpleWriteReadPayloadsOnly) {
  class SimpleWriteReadTensorsOnlyTest (line 107) | class SimpleWriteReadTensorsOnlyTest : public ClientServerPipeTestCase {
    method server (line 132) | void server(Pipe& pipe) override {
    method client (line 140) | void client(Pipe& pipe) override {
  function TEST (line 156) | TEST(Pipe, SimpleWriteReadTensorsOnly) {
  class SimpleWriteReadWithAllTargetDevicesTest (line 161) | class SimpleWriteReadWithAllTargetDevicesTest
    method server (line 195) | void server(Pipe& pipe) override {
    method client (line 203) | void client(Pipe& pipe) override {
  function TEST (line 219) | TEST(Pipe, SimpleWriteReadWithAllTargetDevices) {
  class SimpleWriteReadWithSomeTargetDevicesTest (line 224) | class SimpleWriteReadWithSomeTargetDevicesTest
    method server (line 257) | void server(Pipe& pipe) override {
    method client (line 265) | void client(Pipe& pipe) override {
  function TEST (line 281) | TEST(Pipe, SimpleWriteReadWithSomeTargetDevices) {
  class MultipleWriteReadTest (line 286) | class MultipleWriteReadTest : public ClientServerPipeTestCase {
    method server (line 320) | void server(Pipe& pipe) override {
    method client (line 335) | void client(Pipe& pipe) override {
  function TEST (line 361) | TEST(Pipe, MultipleWriteRead) {
  class MultipleWriteReadWithSomeTargetDevicesTest (line 366) | class MultipleWriteReadWithSomeTargetDevicesTest
    method server (line 402) | void server(Pipe& pipe) override {
    method client (line 417) | void client(Pipe& pipe) override {
  function TEST (line 443) | TEST(Pipe, MultipleWriteReadWithSomeTargetDevices) {
  class WriteFromBothThenReadTest (line 448) | class WriteFromBothThenReadTest : public ClientServerPipeTestCase {
    method server (line 482) | void server(Pipe& pipe) override {
    method client (line 503) | void client(Pipe& pipe) override {
  function TEST (line 525) | TEST(Pipe, WriteFromBothThenRead) {

FILE: tensorpipe/test/core/pipe_test.h
  type Storage (line 26) | struct Storage {
  type InlineMessage (line 31) | struct InlineMessage {
  function else (line 140) | else if (targetDevice.type == tensorpipe::kCudaDeviceType) {
  function pipeWriteWithFuture (line 164) | void> pipeWriteWithFuture(
  function expectDescriptorAndStorageMatchMessage (line 225) | inline void expectDescriptorAndStorageMatchMessage(
  function std (line 282) | inline std::vector<std::string> genUrls() {
  function std (line 293) | inline std::shared_ptr<tensorpipe::Context> makeContext() {
  function class (line 322) | class ClientServerPipeTestCase {

FILE: tensorpipe/test/peer_group.h
  function class (line 22) | class PeerGroup {
  function class (line 68) | class ThreadPeerGroup : public PeerGroup {

FILE: tensorpipe/test/python/tensorpipe.py
  class TestTensorpipe (line 14) | class TestTensorpipe(unittest.TestCase):
    method test_read_write (line 15) | def test_read_write(self):

FILE: tensorpipe/test/test.cc
  type Initializer (line 14) | struct Initializer {
    method Initializer (line 15) | explicit Initializer() {

FILE: tensorpipe/test/test_environment.h
  function class (line 11) | class TestEnvironment {

FILE: tensorpipe/test/transport/connection_test.cc
  function TEST_P (line 19) | TEST_P(TransportTest, Connection_Initialization) {
  function TEST_P (line 43) | TEST_P(TransportTest, Connection_InitializationError) {
  function TEST_P (line 68) | TEST_P(TransportTest, DISABLED_Connection_DestroyConnectionFromCallback) {
  type MyNopType (line 95) | struct MyNopType {
  function TEST_P (line 102) | TEST_P(TransportTest, Connection_NopWrite) {
  function TEST_P (line 128) | TEST_P(TransportTest, Connection_QueueWritesBeforeReads) {
  function TEST_P (line 179) | TEST_P(TransportTest, DISABLED_Connection_EmptyBuffer) {

FILE: tensorpipe/test/transport/context_test.cc
  function TEST_P (line 16) | TEST_P(TransportTest, Context_Basics) {
  function TEST_P (line 50) | TEST_P(TransportTest, Context_DomainDescriptor) {

FILE: tensorpipe/test/transport/ibv/connection_test.cc
  class IbvTransportTest (line 20) | class IbvTransportTest : public TransportTest {}
  function TEST_P (line 29) | TEST_P(IbvTransportTest, Chunking) {
  function TEST_P (line 65) | TEST_P(IbvTransportTest, ChunkingImplicitRead) {
  function TEST_P (line 92) | TEST_P(IbvTransportTest, QueueWrites) {
  type MyNopType (line 138) | struct MyNopType {
  function TEST_P (line 145) | TEST_P(IbvTransportTest, NopWriteWrapAround) {

FILE: tensorpipe/test/transport/ibv/context_test.cc
  class IbvTransportContextTest (line 16) | class IbvTransportContextTest : public TransportTest {}
  function TEST_P (line 26) | TEST_P(IbvTransportContextTest, LookupHostnameAddress) {
  function TEST_P (line 44) | TEST_P(IbvTransportContextTest, LookupInterfaceAddress) {

FILE: tensorpipe/test/transport/ibv/ibv_test.h
  function class (line 14) | class IbvTransportTestHelper : public TransportTestHelper {

FILE: tensorpipe/test/transport/ibv/sockaddr_test.cc
  function family (line 19) | int family(const ibv::Sockaddr& addr) {
  function port (line 24) | int port(const ibv::Sockaddr& addr) {
  function TEST (line 39) | TEST(IbvSockaddr, InetBadPort) {
  function TEST (line 47) | TEST(IbvSockaddr, Inet) {
  function TEST (line 70) | TEST(IbvSockaddr, Inet6BadPort) {
  function TEST (line 87) | TEST(IbvSockaddr, Inet6) {

FILE: tensorpipe/test/transport/listener_test.cc
  function TEST_P (line 16) | TEST_P(TransportTest, Listener_Basics) {
  function TEST_P (line 50) | TEST_P(TransportTest, Listener_AcceptCallbacksAreQueued) {
  function TEST_P (line 86) | TEST_P(TransportTest, Listener_IncomingConnectionsAreQueued) {
  function TEST_P (line 122) | TEST_P(TransportTest, Listener_CreateThenCloseAndThenGetAddress) {
  function TEST_P (line 166) | TEST_P(TransportTest, Listener_CreateAfterClosingContextAndThenGetAddres...

FILE: tensorpipe/test/transport/shm/connection_test.cc
  class ShmTransportTest (line 20) | class ShmTransportTest : public TransportTest {}
  function TEST_P (line 29) | TEST_P(ShmTransportTest, Chunking) {
  function TEST_P (line 65) | TEST_P(ShmTransportTest, ChunkingImplicitRead) {
  function TEST_P (line 92) | TEST_P(ShmTransportTest, QueueWrites) {
  type MyNopType (line 138) | struct MyNopType {
  function TEST_P (line 145) | TEST_P(ShmTransportTest, NopWriteWrapAround) {

FILE: tensorpipe/test/transport/shm/listener_test.cc
  class ShmListenerTest (line 24) | class ShmListenerTest : public TransportTest {}
  function generateUniqueAddr (line 28) | std::string generateUniqueAddr() {
  function TEST_P (line 39) | TEST_P(ShmListenerTest, ExplicitAbstractSocketName) {
  function TEST_P (line 56) | TEST_P(ShmListenerTest, AutobindAbstractSocketName) {

FILE: tensorpipe/test/transport/shm/reactor_test.cc
  function run (line 24) | void run(std::function<void(int)> fn1, std::function<void(int)> fn2) {
  function TEST (line 53) | TEST(ShmReactor, Basic) {
  function TEST (line 98) | TEST(ShmReactor, TokenReuse) {

FILE: tensorpipe/test/transport/shm/shm_test.h
  function class (line 16) | class SHMTransportTestHelper : public TransportTestHelper {

FILE: tensorpipe/test/transport/shm/sockaddr_test.cc
  function TEST (line 15) | TEST(ShmSockaddr, FromToString) {

FILE: tensorpipe/test/transport/transport_test.h
  function class (line 21) | class TransportTestHelper {
  function class (line 45) | class TransportTest : public ::testing::TestWithParam<TransportTestHelpe...
  function class (line 90) | class Bomb {
  function tensorpipe (line 168) | const tensorpipe::Error& error) {

FILE: tensorpipe/test/transport/uv/connection_test.cc
  class UVTransportConnectionTest (line 15) | class UVTransportConnectionTest : public TransportTest {}
  function TEST_P (line 24) | TEST_P(UVTransportConnectionTest, LargeWrite) {

FILE: tensorpipe/test/transport/uv/context_test.cc
  class UVTransportContextTest (line 16) | class UVTransportContextTest : public TransportTest {}
  function TEST_P (line 26) | TEST_P(UVTransportContextTest, LookupHostnameAddress) {
  function TEST_P (line 44) | TEST_P(UVTransportContextTest, LookupInterfaceAddress) {
  function TEST_P (line 53) | TEST_P(UVTransportContextTest, LookupAddressLikeNccl) {

FILE: tensorpipe/test/transport/uv/loop_test.cc
  type test (line 15) | namespace test {
    type transport (line 16) | namespace transport {
      type uv (line 17) | namespace uv {
        function TEST (line 19) | TEST(UvLoop, Defer) {

FILE: tensorpipe/test/transport/uv/sockaddr_test.cc
  function family (line 19) | int family(const uv::Sockaddr& addr) {
  function port (line 24) | int port(const uv::Sockaddr& addr) {
  function TEST (line 39) | TEST(UvSockaddr, InetBadPort) {
  function TEST (line 46) | TEST(UvSockaddr, Inet) {
  function TEST (line 69) | TEST(UvSockaddr, Inet6BadPort) {
  function TEST (line 86) | TEST(UvSockaddr, Inet6) {

FILE: tensorpipe/test/transport/uv/uv_test.h
  function class (line 14) | class UVTransportTestHelper : public TransportTestHelper {

FILE: tensorpipe/transport/connection.h
  function namespace (line 18) | namespace tensorpipe {

FILE: tensorpipe/transport/connection_boilerplate.h
  function namespace (line 20) | namespace tensorpipe {

FILE: tensorpipe/transport/connection_impl_boilerplate.h
  function namespace (line 22) | namespace tensorpipe {
  function TList (line 169) | TList, TConn>::read(read_callback_fn fn) {
  function TList (line 298) | TList, TConn>::write(
  function TList (line 432) | TList, TConn>::closeFromLoop() {

FILE: tensorpipe/transport/context.h
  function namespace (line 14) | namespace tensorpipe {

FILE: tensorpipe/transport/context_boilerplate.h
  function namespace (line 20) | namespace tensorpipe {
  function TList (line 106) | TList, TConn>::setId(std::string id) {

FILE: tensorpipe/transport/context_impl_boilerplate.h
  function namespace (line 22) | namespace tensorpipe {
  function TList (line 190) | TList, TConn>::unenroll(TConn& connection) {
  function TList (line 209) | TList, TConn>::close() {
  function TList (line 222) | TList, TConn>::setError(Error error) {

FILE: tensorpipe/transport/error.cc
  type tensorpipe (line 11) | namespace tensorpipe {
    type transport (line 12) | namespace transport {

FILE: tensorpipe/transport/error.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/transport/ibv/connection_impl.cc
  type tensorpipe (line 32) | namespace tensorpipe {
    type transport (line 33) | namespace transport {
      type ibv (line 34) | namespace ibv {
        type Exchange (line 41) | struct Exchange {
        type Exchange (line 267) | struct Exchange

FILE: tensorpipe/transport/ibv/connection_impl.h
  type State (line 52) | enum State {
  function State (line 110) | State state_{INITIALIZING};

FILE: tensorpipe/transport/ibv/context_impl.cc
  type tensorpipe (line 14) | namespace tensorpipe {
    type transport (line 15) | namespace transport {
      type ibv (line 16) | namespace ibv {
        function generateDomainDescriptor (line 24) | std::string generateDomainDescriptor() {
        function Reactor (line 102) | Reactor& ContextImpl::getReactor() {

FILE: tensorpipe/transport/ibv/context_impl.h
  function namespace (line 20) | namespace tensorpipe {

FILE: tensorpipe/transport/ibv/error.cc
  type tensorpipe (line 17) | namespace tensorpipe {
    type transport (line 18) | namespace transport {
      type ibv (line 19) | namespace ibv {

FILE: tensorpipe/transport/ibv/error.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/transport/ibv/factory.cc
  type tensorpipe (line 16) | namespace tensorpipe {
    type transport (line 17) | namespace transport {
      type ibv (line 18) | namespace ibv {
        function create (line 20) | std::shared_ptr<Context> create() {

FILE: tensorpipe/transport/ibv/factory.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/transport/ibv/listener_impl.cc
  type tensorpipe (line 25) | namespace tensorpipe {
    type transport (line 26) | namespace transport {
      type ibv (line 27) | namespace ibv {
        type sockaddr_storage (line 71) | struct sockaddr_storage
        type sockaddr (line 78) | struct sockaddr

FILE: tensorpipe/transport/ibv/listener_impl.h
  function namespace (line 24) | namespace tensorpipe {

FILE: tensorpipe/transport/ibv/reactor.cc
  type tensorpipe (line 14) | namespace tensorpipe {
    type transport (line 15) | namespace transport {
      type ibv (line 16) | namespace ibv {

FILE: tensorpipe/transport/ibv/reactor.h
  function class (line 31) | class IbvEventHandler {
  function IbvLib (line 58) | const IbvLib& getIbvLib() {
  function IbvAddress (line 74) | const IbvAddress& getIbvAddress() {
  type WriteInfo (line 82) | struct WriteInfo {
  type AckInfo (line 92) | struct AckInfo {
  function numAvailableWrites_ (line 134) | uint32_t numAvailableWrites_{kNumPendingWriteReqs};

FILE: tensorpipe/transport/ibv/sockaddr.cc
  type tensorpipe (line 21) | namespace tensorpipe {
    type transport (line 22) | namespace transport {
      type ibv (line 23) | namespace ibv {
        function Sockaddr (line 25) | Sockaddr Sockaddr::createInetSockAddr(const std::string& str) {
        type sockaddr_in (line 115) | struct sockaddr_in
        type sockaddr_in6 (line 121) | struct sockaddr_in6

FILE: tensorpipe/transport/ibv/sockaddr.h
  function namespace (line 18) | namespace tensorpipe {

FILE: tensorpipe/transport/ibv/utility.cc
  type tensorpipe (line 32) | namespace tensorpipe {
    type transport (line 33) | namespace transport {
      type ibv (line 34) | namespace ibv {
        type InterfaceAddressesDeleter (line 38) | struct InterfaceAddressesDeleter {
          type ifaddrs (line 39) | struct ifaddrs
        type ifaddrs (line 45) | struct ifaddrs
        function createInterfaceAddresses (line 47) | std::tuple<Error, InterfaceAddresses> createInterfaceAddresses() {
        function getHostname (line 58) | std::tuple<Error, std::string> getHostname() {
        type AddressInfoDeleter (line 68) | struct AddressInfoDeleter {
          type addrinfo (line 69) | struct addrinfo
        type addrinfo (line 74) | struct addrinfo
        function createAddressInfo (line 76) | std::tuple<Error, AddressInfo> createAddressInfo(std::string host) {
        function lookupAddrForIface (line 94) | std::tuple<Error, std::string> lookupAddrForIface(std::string ifac...
        function lookupAddrForHostname (line 128) | std::tuple<Error, std::string> lookupAddrForHostname() {

FILE: tensorpipe/transport/ibv/utility.h
  function namespace (line 16) | namespace tensorpipe {

FILE: tensorpipe/transport/listener.h
  function namespace (line 18) | namespace tensorpipe {

FILE: tensorpipe/transport/listener_boilerplate.h
  function namespace (line 19) | namespace tensorpipe {

FILE: tensorpipe/transport/listener_impl_boilerplate.h
  function namespace (line 24) | namespace tensorpipe {
  function TList (line 150) | TList, TConn>::accept(
  function TList (line 238) | TList, TConn>::close() {
  function TList (line 251) | TList, TConn>::setError(Error error) {

FILE: tensorpipe/transport/shm/connection_impl.cc
  type tensorpipe (line 28) | namespace tensorpipe {
    type transport (line 29) | namespace transport {
      type shm (line 30) | namespace shm {

FILE: tensorpipe/transport/shm/connection_impl.h
  function namespace (line 26) | namespace tensorpipe {

FILE: tensorpipe/transport/shm/context_impl.cc
  type tensorpipe (line 17) | namespace tensorpipe {
    type transport (line 18) | namespace transport {
      type shm (line 19) | namespace shm {

FILE: tensorpipe/transport/shm/context_impl.h
  function namespace (line 19) | namespace tensorpipe {

FILE: tensorpipe/transport/shm/factory.cc
  type tensorpipe (line 16) | namespace tensorpipe {
    type transport (line 17) | namespace transport {
      type shm (line 18) | namespace shm {
        function create (line 20) | std::shared_ptr<Context> create() {

FILE: tensorpipe/transport/shm/factory.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/transport/shm/listener_impl.cc
  type tensorpipe (line 25) | namespace tensorpipe {
    type transport (line 26) | namespace transport {
      type shm (line 27) | namespace shm {
        type sockaddr_storage (line 65) | struct sockaddr_storage
        type sockaddr (line 72) | struct sockaddr

FILE: tensorpipe/transport/shm/listener_impl.h
  function namespace (line 20) | namespace tensorpipe {

FILE: tensorpipe/transport/shm/reactor.cc
  type tensorpipe (line 14) | namespace tensorpipe {
    type transport (line 15) | namespace transport {
      type shm (line 16) | namespace shm {
        function writeToken (line 20) | void writeToken(Reactor::Producer& producer, Reactor::TToken token) {

FILE: tensorpipe/transport/shm/sockaddr.cc
  type tensorpipe (line 19) | namespace tensorpipe {
    type transport (line 20) | namespace transport {
      type shm (line 21) | namespace shm {
        function Sockaddr (line 23) | Sockaddr Sockaddr::createAbstractUnixAddr(const std::string& name) {
        type sockaddr (line 48) | struct sockaddr
        type sockaddr_un (line 61) | struct sockaddr_un
        type sockaddr_un (line 62) | struct sockaddr_un

FILE: tensorpipe/transport/shm/sockaddr.h
  function namespace (line 23) | namespace tensorpipe {

FILE: tensorpipe/transport/uv/connection_impl.cc
  type tensorpipe (line 25) | namespace tensorpipe {
    type transport (line 26) | namespace transport {
      type uv (line 27) | namespace uv {

FILE: tensorpipe/transport/uv/connection_impl.h
  function namespace (line 22) | namespace tensorpipe {

FILE: tensorpipe/transport/uv/context_impl.cc
  type tensorpipe (line 15) | namespace tensorpipe {
    type transport (line 16) | namespace transport {
      type uv (line 17) | namespace uv {
        function generateDomainDescriptor (line 25) | std::string generateDomainDescriptor() {

FILE: tensorpipe/transport/uv/context_impl.h
  function namespace (line 21) | namespace tensorpipe {

FILE: tensorpipe/transport/uv/error.cc
  type tensorpipe (line 13) | namespace tensorpipe {
    type transport (line 14) | namespace transport {
      type uv (line 15) | namespace uv {

FILE: tensorpipe/transport/uv/error.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/transport/uv/factory.cc
  type tensorpipe (line 16) | namespace tensorpipe {
    type transport (line 17) | namespace transport {
      type uv (line 18) | namespace uv {
        function create (line 20) | std::shared_ptr<Context> create() {

FILE: tensorpipe/transport/uv/factory.h
  function namespace (line 15) | namespace tensorpipe {

FILE: tensorpipe/transport/uv/listener_impl.cc
  type tensorpipe (line 20) | namespace tensorpipe {
    type transport (line 21) | namespace transport {
      type uv (line 22) | namespace uv {

FILE: tensorpipe/transport/uv/listener_impl.h
  function namespace (line 19) | namespace tensorpipe {

FILE: tensorpipe/transport/uv/loop.cc
  type tensorpipe (line 14) | namespace tensorpipe {
    type transport (line 15) | namespace transport {
      type uv (line 16) | namespace uv {

FILE: tensorpipe/transport/uv/loop.h
  function namespace (line 23) | namespace tensorpipe {

FILE: tensorpipe/transport/uv/sockaddr.cc
  type tensorpipe (line 21) | namespace tensorpipe {
    type transport (line 22) | namespace transport {
      type uv (line 23) | namespace uv {
        function Sockaddr (line 25) | Sockaddr Sockaddr::createInetSockAddr(const std::string& str) {
        type sockaddr_in (line 99) | struct sockaddr_in
        type sockaddr_in6 (line 105) | struct sockaddr_in6

FILE: tensorpipe/transport/uv/sockaddr.h
  function namespace (line 18) | namespace tensorpipe {

FILE: tensorpipe/transport/uv/utility.cc
  type tensorpipe (line 16) | namespace tensorpipe {
    type transport (line 17) | namespace transport {
      type uv (line 18) | namespace uv {
        function lookupAddrForIface (line 20) | std::tuple<Error, std::string> lookupAddrForIface(std::string ifac...
        function lookupAddrForHostname (line 53) | std::tuple<Error, std::string> lookupAddrForHostname() {
        function lookupAddrLikeNccl (line 138) | std::tuple<Error, std::string> lookupAddrLikeNccl(

FILE: tensorpipe/transport/uv/utility.h
  function namespace (line 19) | namespace tensorpipe {

FILE: tensorpipe/transport/uv/uv.h
  function namespace (line 25) | namespace tensorpipe {
Condensed preview — 292 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,300K chars).
[
  {
    "path": ".circleci/Dockerfile.cuda10.1",
    "chars": 179,
    "preview": "FROM nvidia/cuda:10.1-devel-ubuntu18.04\n\n# Install APT packages.\nRUN apt-get update && \\\n        apt-get install -y buil"
  },
  {
    "path": ".circleci/Dockerfile.cuda10.2",
    "chars": 179,
    "preview": "FROM nvidia/cuda:10.2-devel-ubuntu18.04\n\n# Install APT packages.\nRUN apt-get update && \\\n        apt-get install -y buil"
  },
  {
    "path": ".circleci/Dockerfile.cuda11.0",
    "chars": 179,
    "preview": "FROM nvidia/cuda:11.0-devel-ubuntu18.04\n\n# Install APT packages.\nRUN apt-get update && \\\n        apt-get install -y buil"
  },
  {
    "path": ".circleci/Dockerfile.cuda11.1",
    "chars": 179,
    "preview": "FROM nvidia/cuda:11.1-devel-ubuntu18.04\n\n# Install APT packages.\nRUN apt-get update && \\\n        apt-get install -y buil"
  },
  {
    "path": ".circleci/Dockerfile.cuda9.2",
    "chars": 178,
    "preview": "FROM nvidia/cuda:9.2-devel-ubuntu18.04\n\n# Install APT packages.\nRUN apt-get update && \\\n        apt-get install -y build"
  },
  {
    "path": ".circleci/config.yml",
    "chars": 10624,
    "preview": "version: 2.1\n\njobs:\n  build:\n    parameters:\n      docker_image:\n        type: string\n        default: \"\"\n      apt_get:"
  },
  {
    "path": ".gitignore",
    "chars": 41,
    "preview": "*~\n.DS_Store\n/build/\n/cmake-build-debug/\n"
  },
  {
    "path": ".gitmodules",
    "chars": 445,
    "preview": "[submodule \"third_party/pybind11\"]\n\tpath = third_party/pybind11\n\turl = https://github.com/pybind/pybind11.git\n[submodule"
  },
  {
    "path": "CMakeLists.txt",
    "chars": 703,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "chars": 3349,
    "preview": "# Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, we as\ncontributors and"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 1700,
    "preview": "# Contributing to TensorPipe\n\nWe want to make contributing to this project as easy and transparent as\npossible.\n\n## Our "
  },
  {
    "path": "LICENSE.txt",
    "chars": 1535,
    "preview": "BSD License\n\nFor TensorPipe software\n\nCopyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.\n\nRedistrib"
  },
  {
    "path": "README.md",
    "chars": 9251,
    "preview": "# TensorPipe\n\nThe TensorPipe project provides a tensor-aware channel to transfer rich objects\nfrom one process to anothe"
  },
  {
    "path": "cmake/FindPackageHandleStandardArgs.cmake",
    "chars": 14944,
    "preview": "# Copyright 2000-2020 Kitware, Inc. and Contributors\n# All rights reserved.\n#\n# Distributed under the OSI-approved BSD 3"
  },
  {
    "path": "cmake/FindPackageMessage.cmake",
    "chars": 1606,
    "preview": "# Copyright 2000-2020 Kitware, Inc. and Contributors\n# All rights reserved.\n#\n# Distributed under the OSI-approved BSD 3"
  },
  {
    "path": "cmake/Finduv.cmake",
    "chars": 2507,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
  },
  {
    "path": "cmake/MiscCheck.cmake",
    "chars": 525,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
  },
  {
    "path": "cmake/Options.cmake",
    "chars": 2113,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
  },
  {
    "path": "cmake/Sanitize.cmake",
    "chars": 400,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
  },
  {
    "path": "docs/cuda_gotchas.md",
    "chars": 11555,
    "preview": "# CUDA gotchas\n\nWhile implementing CUDA channels we hit some undocumented \"quirks\" which forced us to adapt our original"
  },
  {
    "path": "docs/development.md",
    "chars": 1794,
    "preview": "# Development\n\nTensorPipe uses CMake for its build system.\n\n## Dependencies\n\nTo build TensorPipe, you need:\n\n* C++14 com"
  },
  {
    "path": "docs/linux_support.md",
    "chars": 7842,
    "preview": "This document is intended for developers and advanced users. It’s the kind of document that risks going out of date very"
  },
  {
    "path": "docs/shm.md",
    "chars": 6205,
    "preview": "# The shm transport\n\nThis document is an attempt to capture the design principles and inner\nworking of the shm transport"
  },
  {
    "path": "docs/thread_model.md",
    "chars": 11911,
    "preview": "# TensorPipe's thread model\n\nTensorPipe is spawning multiple threads internally. This is a design\nrequirement as, for ex"
  },
  {
    "path": "setup.py",
    "chars": 1775,
    "preview": "#!/usr/bin/env python3\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code "
  },
  {
    "path": "tensorpipe/.clang-format",
    "chars": 2569,
    "preview": "---\nAccessModifierOffset: -1\nAlignAfterOpenBracket: AlwaysBreak\nAlignConsecutiveAssignments: false\nAlignConsecutiveDecla"
  },
  {
    "path": "tensorpipe/.clang-tidy",
    "chars": 1937,
    "preview": "---\nInheritParentConfig: true\nChecks: '\nreadability-identifier-naming,\nreadability-inconsistent-declaration-parameter-na"
  },
  {
    "path": "tensorpipe/CMakeLists.txt",
    "chars": 8625,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
  },
  {
    "path": "tensorpipe/benchmark/CMakeLists.txt",
    "chars": 584,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
  },
  {
    "path": "tensorpipe/benchmark/benchmark_pipe.cc",
    "chars": 27116,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/benchmark/benchmark_transport.cc",
    "chars": 6299,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/benchmark/channel_registry.cc",
    "chars": 2773,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/benchmark/channel_registry.h",
    "chars": 503,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/benchmark/measurements.h",
    "chars": 1201,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/benchmark/options.cc",
    "chars": 5626,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/benchmark/options.h",
    "chars": 968,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/benchmark/registry.h",
    "chars": 5497,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/benchmark/transport_registry.cc",
    "chars": 1542,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/benchmark/transport_registry.h",
    "chars": 513,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/basic/channel_impl.cc",
    "chars": 5591,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/basic/channel_impl.h",
    "chars": 2823,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/basic/context_impl.cc",
    "chars": 1429,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/basic/context_impl.h",
    "chars": 1257,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/basic/factory.cc",
    "chars": 669,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/basic/factory.h",
    "chars": 463,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/channel.h",
    "chars": 2915,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/channel_boilerplate.h",
    "chars": 3838,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/channel_impl_boilerplate.h",
    "chars": 8799,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cma/channel_impl.cc",
    "chars": 9088,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cma/channel_impl.h",
    "chars": 3196,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cma/context_impl.cc",
    "chars": 14091,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cma/context_impl.h",
    "chars": 2061,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cma/factory.cc",
    "chars": 659,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cma/factory.h",
    "chars": 459,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/context.h",
    "chars": 3701,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/context_boilerplate.h",
    "chars": 4243,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/context_impl_boilerplate.h",
    "chars": 8635,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/channel_impl.cc",
    "chars": 23635,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/channel_impl.h",
    "chars": 5020,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/constants.h",
    "chars": 832,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/context_impl.cc",
    "chars": 5656,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/context_impl.h",
    "chars": 2287,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/factory.cc",
    "chars": 757,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/factory.h",
    "chars": 508,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/channel_impl.cc",
    "chars": 22296,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/channel_impl.h",
    "chars": 8810,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/constants.h",
    "chars": 1461,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/context_impl.cc",
    "chars": 23656,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/context_impl.h",
    "chars": 4973,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/error.h",
    "chars": 639,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/factory.cc",
    "chars": 772,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/factory.h",
    "chars": 592,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/channel_impl.cc",
    "chars": 16901,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/channel_impl.h",
    "chars": 5799,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/constants.h",
    "chars": 831,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/context_impl.cc",
    "chars": 15041,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/context_impl.h",
    "chars": 5238,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/factory.cc",
    "chars": 684,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/factory.h",
    "chars": 469,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/channel_impl.cc",
    "chars": 10311,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/channel_impl.h",
    "chars": 3760,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/context_impl.cc",
    "chars": 3181,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/context_impl.h",
    "chars": 1402,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/factory.cc",
    "chars": 684,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/factory.h",
    "chars": 469,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/error.cc",
    "chars": 630,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/error.h",
    "chars": 778,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/helpers.cc",
    "chars": 1190,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/helpers.h",
    "chars": 565,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/mpt/channel_impl.cc",
    "chars": 12329,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/mpt/channel_impl.h",
    "chars": 3734,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/mpt/context_impl.cc",
    "chars": 8111,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/mpt/context_impl.h",
    "chars": 2993,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/mpt/factory.cc",
    "chars": 834,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/mpt/factory.h",
    "chars": 646,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/mpt/nop_types.h",
    "chars": 1128,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/xth/channel_impl.cc",
    "chars": 8909,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/xth/channel_impl.h",
    "chars": 3177,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/xth/context_impl.cc",
    "chars": 3614,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/xth/context_impl.h",
    "chars": 1984,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/xth/factory.cc",
    "chars": 659,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/channel/xth/factory.h",
    "chars": 459,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/address.cc",
    "chars": 678,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/address.h",
    "chars": 360,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/allocator.cc",
    "chars": 1918,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/allocator.h",
    "chars": 1358,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/buffer.h",
    "chars": 3472,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/busy_polling_loop.h",
    "chars": 1256,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/callback.h",
    "chars": 4701,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/cpu_buffer.h",
    "chars": 441,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/cuda.h",
    "chars": 8945,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/cuda_buffer.cc",
    "chars": 779,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/cuda_buffer.h",
    "chars": 468,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/cuda_lib.h",
    "chars": 6301,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/cuda_loop.cc",
    "chars": 2782,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/cuda_loop.h",
    "chars": 1256,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/deferred_executor.h",
    "chars": 10377,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/defs.h",
    "chars": 10631,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/device.h",
    "chars": 1649,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/dl.h",
    "chars": 2793,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/epoll_loop.cc",
    "chars": 6128,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/epoll_loop.h",
    "chars": 5513,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/error.cc",
    "chars": 1264,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/error.h",
    "chars": 3128,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/error_macros.h",
    "chars": 502,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/fd.cc",
    "chars": 1525,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/fd.h",
    "chars": 2529,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/ibv.cc",
    "chars": 5502,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/ibv.h",
    "chars": 6858,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/ibv_lib.h",
    "chars": 17891,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/memory.h",
    "chars": 1513,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/nop.h",
    "chars": 9312,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/nvml_lib.h",
    "chars": 6898,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/optional.h",
    "chars": 128,
    "preview": "#pragma once\n\n#include <optional>\n\nnamespace tensorpipe {\n\nusing std::optional;\nusing std::nullopt;\n\n} // namespace tens"
  },
  {
    "path": "tensorpipe/common/queue.h",
    "chars": 988,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/ringbuffer.h",
    "chars": 6533,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/ringbuffer_read_write_ops.h",
    "chars": 9266,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/ringbuffer_role.h",
    "chars": 7858,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/shm_ringbuffer.h",
    "chars": 2783,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/shm_segment.cc",
    "chars": 5842,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/shm_segment.h",
    "chars": 6180,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/socket.cc",
    "chars": 3372,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/socket.h",
    "chars": 6508,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/state_machine.h",
    "chars": 4405,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/stream_read_write_ops.h",
    "chars": 5688,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/strings.h",
    "chars": 1705,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/system.cc",
    "chars": 9699,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/common/system.h",
    "chars": 3366,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/config.h.in",
    "chars": 366,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/config_cuda.h.in",
    "chars": 329,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/context.cc",
    "chars": 1353,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/context.h",
    "chars": 2468,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/context_impl.cc",
    "chars": 8938,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/context_impl.h",
    "chars": 4835,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/error.cc",
    "chars": 676,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/error.h",
    "chars": 961,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/listener.cc",
    "chars": 1223,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/listener.h",
    "chars": 2928,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/listener_impl.cc",
    "chars": 11316,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/listener_impl.h",
    "chars": 4224,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/message.h",
    "chars": 2885,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/nop_types.h",
    "chars": 2259,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/pipe.cc",
    "chars": 1324,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/pipe.h",
    "chars": 2972,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/pipe_impl.cc",
    "chars": 49098,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/core/pipe_impl.h",
    "chars": 9638,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/misc/CMakeLists.txt",
    "chars": 469,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
  },
  {
    "path": "tensorpipe/misc/dump_state_machine.cc",
    "chars": 4590,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/python/CMakeLists.txt",
    "chars": 533,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
  },
  {
    "path": "tensorpipe/python/tensorpipe.cc",
    "chars": 15162,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/tensorpipe.h",
    "chars": 1449,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/tensorpipe_cuda.h",
    "chars": 704,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/CMakeLists.txt",
    "chars": 3244,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
  },
  {
    "path": "tensorpipe/test/channel/basic/basic_test.cc",
    "chars": 838,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/channel_test.cc",
    "chars": 11618,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/channel_test.h",
    "chars": 8903,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cpu.cc",
    "chars": 3699,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cpu.h",
    "chars": 1328,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cuda.cc",
    "chars": 4390,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cuda.h",
    "chars": 2618,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cuda_multi_gpu.cc",
    "chars": 10169,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cuda_xdtt.cc",
    "chars": 5935,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/cma/CMakeLists.txt",
    "chars": 341,
    "preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
  },
  {
    "path": "tensorpipe/test/channel/cma/cma_test.cc",
    "chars": 826,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/cma/docker_tests.sh",
    "chars": 14120,
    "preview": "#!/usr/bin/env bash\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is "
  },
  {
    "path": "tensorpipe/test/channel/cma/probe.cc",
    "chars": 4791,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/cma/probe_report_checker.py",
    "chars": 1774,
    "preview": "#!/usr/bin/env python3\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code "
  },
  {
    "path": "tensorpipe/test/channel/cuda_basic/cuda_basic_test.cc",
    "chars": 2385,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/cuda_gdr/cuda_gdr_test.cc",
    "chars": 1113,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/cuda_helpers.h",
    "chars": 3091,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/cuda_ipc/cuda_ipc_test.cc",
    "chars": 2022,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/cuda_xth/cuda_xth_test.cc",
    "chars": 1118,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/kernel.cu",
    "chars": 614,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/kernel.cuh",
    "chars": 448,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/mpt/mpt_test.cc",
    "chars": 3820,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/channel/xth/xth_test.cc",
    "chars": 826,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/common/cuda_test.cc",
    "chars": 2793,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/common/defs_test.cc",
    "chars": 719,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/common/epoll_loop_test.cc",
    "chars": 6272,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  },
  {
    "path": "tensorpipe/test/common/ringbuffer_test.cc",
    "chars": 18522,
    "preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
  }
]

// ... and 92 more files (download for full content)

About this extraction

This page contains the full source code of the pytorch/tensorpipe GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 292 files (1.2 MB), approximately 314.9k tokens, and a symbol index with 883 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!