[
  {
    "path": ".circleci/Dockerfile.cuda10.1",
    "content": "FROM nvidia/cuda:10.1-devel-ubuntu18.04\n\n# Install APT packages.\nRUN apt-get update && \\\n        apt-get install -y build-essential cmake\n\nCOPY . /tensorpipe\n\nWORKDIR /tensorpipe\n"
  },
  {
    "path": ".circleci/Dockerfile.cuda10.2",
    "content": "FROM nvidia/cuda:10.2-devel-ubuntu18.04\n\n# Install APT packages.\nRUN apt-get update && \\\n        apt-get install -y build-essential cmake\n\nCOPY . /tensorpipe\n\nWORKDIR /tensorpipe\n"
  },
  {
    "path": ".circleci/Dockerfile.cuda11.0",
    "content": "FROM nvidia/cuda:11.0-devel-ubuntu18.04\n\n# Install APT packages.\nRUN apt-get update && \\\n        apt-get install -y build-essential cmake\n\nCOPY . /tensorpipe\n\nWORKDIR /tensorpipe\n"
  },
  {
    "path": ".circleci/Dockerfile.cuda11.1",
    "content": "FROM nvidia/cuda:11.1-devel-ubuntu18.04\n\n# Install APT packages.\nRUN apt-get update && \\\n        apt-get install -y build-essential cmake\n\nCOPY . /tensorpipe\n\nWORKDIR /tensorpipe\n"
  },
  {
    "path": ".circleci/Dockerfile.cuda9.2",
    "content": "FROM nvidia/cuda:9.2-devel-ubuntu18.04\n\n# Install APT packages.\nRUN apt-get update && \\\n        apt-get install -y build-essential cmake\n\nCOPY . /tensorpipe\n\nWORKDIR /tensorpipe\n"
  },
  {
    "path": ".circleci/config.yml",
    "content": "version: 2.1\n\njobs:\n  build:\n    parameters:\n      docker_image:\n        type: string\n        default: \"\"\n      apt_get:\n        type: string\n        default: \"\"\n      c_compiler:\n        type: string\n        default: \"\"\n      cxx_compiler:\n        type: string\n        default: \"\"\n      cmake_args:\n        type: string\n        default: \"\"\n      nproc:\n        type: integer\n        default: 20\n    docker:\n      - image: << parameters.docker_image >>\n    steps:\n      - checkout\n      - run:\n          name: Install apt packages\n          command: |\n            apt-get update\n            apt-get install -y git-core build-essential cmake << parameters.apt_get >>\n      - run:\n          name: Initialize submodules\n          command: |\n            git submodule init\n            git submodule update\n      - run:\n          name: Build\n          command: |\n            mkdir build\n            cd build\n            cmake ../ \\\n              -DCMAKE_C_FLAGS=\"-Werror -Wno-deprecated-declarations\" \\\n              -DCMAKE_CXX_FLAGS=\"-Werror -Wno-deprecated-declarations\" \\\n              -DCMAKE_C_COMPILER=<< parameters.c_compiler >> \\\n              -DCMAKE_CXX_COMPILER=<< parameters.cxx_compiler >> \\\n              -DTP_ENABLE_CMA=OFF \\\n              -DTP_ENABLE_CUDA_IPC=OFF \\\n              -DTP_ENABLE_IBV=OFF \\\n              -DTP_BUILD_TESTING=ON \\\n              << parameters.cmake_args >>\n            make -j<<parameters.nproc>>\n      - run:\n          name: Test\n          command: |\n            cd build\n            ./tensorpipe/test/tensorpipe_test\n      - run:\n          name: Install\n          command: |\n            cd build\n            make install\n  build_gpu:\n    parameters:\n      cuda_version:\n        type: string\n      exclude_tests:\n        type: string\n        default: \"\"\n    machine:\n      resource_class: gpu.nvidia.small.multi\n      image: ubuntu-1604-cuda-10.1:201909-23\n      docker_layer_caching: true\n    steps:\n      - checkout\n      - run:\n          name: Initialize submodules\n          command: |\n            git submodule init\n            git submodule update\n      - run:\n          name: Build/test\n          command: |\n              docker build -t tensorpipe -f .circleci/Dockerfile.cuda<< parameters.cuda_version >> .\n              docker run --gpus all --pid=host tensorpipe sh -c \"\n                mkdir build && cd build &&\n                cmake ../ \\\n                  -DCMAKE_C_FLAGS=\\\"-Werror -Wno-deprecated-declarations\\\" \\\n                  -DCMAKE_CXX_FLAGS=\\\"-Werror -Wno-deprecated-declarations\\\" \\\n                  -DCUDA_NVCC_FLAGS=\\\"-gencode arch=compute_61,code=sm_61\\\" \\\n                  -DTP_ENABLE_SHM=OFF \\\n                  -DTP_ENABLE_CMA=OFF \\\n                  -DTP_USE_CUDA=ON \\\n                  -DTP_ENABLE_CUDA_IPC=ON \\\n                  -DTP_ENABLE_IBV=OFF \\\n                  -DTP_BUILD_TESTING=ON &&\n                make -j20 &&\n                ./tensorpipe/test/tensorpipe_test --gtest_filter='-<< parameters.exclude_tests >>' &&\n                make install\"\n  bare_metal:\n    parameters:\n      image:\n        type: string\n        default: \"\"\n      apt_get:\n        type: string\n        default: \"\"\n      c_compiler:\n        type: string\n        default: \"\"\n      cxx_compiler:\n        type: string\n        default: \"\"\n      cmake_args:\n        type: string\n        default: \"\"\n      nproc:\n        type: integer\n        default: 20\n    machine:\n      image: << parameters.image >>\n    steps:\n      - checkout\n      - run:\n          name: Install apt packages\n          command: |\n            sudo apt-get update\n            sudo apt-get install -y git-core build-essential cmake libibverbs1 rdma-core linux-modules-extra-$(uname -r) << parameters.apt_get >>\n      - run:\n          name: Initialize submodules\n          command: |\n            git submodule init\n            git submodule update\n      - run:\n          name: Build\n          command: |\n            mkdir build\n            cd build\n            cmake ../ \\\n              -DCMAKE_C_FLAGS=\"-Werror -Wno-deprecated-declarations\" \\\n              -DCMAKE_CXX_FLAGS=\"-Werror -Wno-deprecated-declarations\" \\\n              -DCMAKE_C_COMPILER=<< parameters.c_compiler >> \\\n              -DCMAKE_CXX_COMPILER=<< parameters.cxx_compiler >> \\\n              -DTP_ENABLE_CUDA_IPC=OFF \\\n              -DTP_ENABLE_IBV=ON \\\n              -DTP_BUILD_TESTING=ON \\\n              << parameters.cmake_args >>\n            make -j<<parameters.nproc>>\n      - run:\n          name: Configure Soft-RoCE (RXE) InfiniBand interface\n          command: |\n            # Find the name of the first non-loopback IP interface\n            INTERFACE_NAME=$(ip link | grep '^2: ' | sed -re 's/2: ([a-z0-9]+): .*/\\1/')\n            sudo rdma link add rxe0 type rxe netdev $INTERFACE_NAME\n      - run:\n          name: Test\n          command: |\n            cd build\n            ./tensorpipe/test/tensorpipe_test\n      - run:\n          name: Test CMA channel autodetection with Docker\n          command: |\n            bash -eo pipefail tensorpipe/test/channel/cma/docker_tests.sh\n      - run:\n          name: Install\n          command: |\n            cd build\n            sudo make install\n  build_osx:\n    macos:\n      xcode: 12.4.0\n    steps:\n      - checkout\n      - run:\n          name: Install homebrew packages\n          command: |\n            brew install cmake\n      - run:\n          name: Initialize submodules\n          command: |\n            git submodule init\n            git submodule update\n      - run:\n          name: Build\n          command: |\n            mkdir build\n            cd build\n            cmake ../ \\\n              -DCMAKE_C_FLAGS=\"-Werror -Wno-deprecated-declarations\" \\\n              -DCMAKE_CXX_FLAGS=\"-Werror -Wno-deprecated-declarations\" \\\n              -DTP_BUILD_TESTING=ON\n            make -j\n      - run:\n          name: Test\n          command: |\n            cd build\n            ./tensorpipe/test/tensorpipe_test\n      - run:\n          name: Install\n          command: |\n            cd build\n            make install\n  python:\n    parameters:\n      docker_image:\n        type: string\n        default: \"\"\n      apt_get:\n        type: string\n        default: \"\"\n    docker:\n      - image: << parameters.docker_image >>\n    steps:\n      - checkout\n      - run:\n          name: Install apt packages\n          command: |\n            apt-get update\n            apt-get install -y git-core build-essential cmake python3-dev python3-venv << parameters.apt_get >>\n      - run:\n          name: Initialize submodules\n          command: |\n            git submodule init\n            git submodule update\n      - run:\n          name: Build\n          command: |\n            python3 -m venv venv\n            source venv/bin/activate\n            TP_ENABLE_CMA=OFF TP_ENABLE_CUDA_IPC=OFF TP_ENABLE_IBV=OFF python3 setup.py install\n      - run:\n          name: Test\n          command: |\n            source venv/bin/activate\n            python3 tensorpipe/test/python/tensorpipe.py\n  format:\n    docker:\n      - image: ubuntu:18.04\n    steps:\n      - checkout\n      - run:\n          name: Install clang-format\n          command: |\n            apt-get update\n            apt-get install -y git-core clang-format-10\n      - run:\n          name: Verify clang-format\n          command: |\n             git ls-files | grep -E  '\\.(cc|h)$' | xargs clang-format-10 -i\n             if git diff --quiet; then\n               echo \"Formatting OK!\"\n             else\n               echo \"Formatting not OK!\"\n               echo \"------------------\"\n               git --no-pager diff --color\n               exit 1\n             fi\n\nworkflows:\n  build:\n    jobs:\n      - build:\n          name: gcc5\n          docker_image: ubuntu:18.04\n          apt_get: \"gcc-5 g++-5\"\n          c_compiler: gcc-5\n          cxx_compiler: g++-5\n      - build:\n          name: gcc7\n          docker_image: ubuntu:18.04\n          apt_get: \"gcc-7 g++-7\"\n          c_compiler: gcc-7\n          cxx_compiler: g++-7\n      - build:\n          name: clang6\n          docker_image: ubuntu:18.04\n          apt_get: \"clang-6.0\"\n          c_compiler: clang-6.0\n          cxx_compiler: clang++-6.0\n      - build:\n          name: gcc7-asan\n          docker_image: ubuntu:18.04\n          apt_get: \"gcc-7 g++-7\"\n          c_compiler: gcc-7\n          cxx_compiler: g++-7\n          cmake_args: -DSANITIZE=address\n      - build:\n          name: gcc7-tsan\n          docker_image: ubuntu:18.04\n          apt_get: \"gcc-7 g++-7\"\n          c_compiler: gcc-7\n          cxx_compiler: g++-7\n          cmake_args: -DSANITIZE=thread\n      - bare_metal:\n          name: bare-metal\n          image: ubuntu-2004:202008-01\n          apt_get: \"gcc-7 g++-7\"\n          c_compiler: gcc-7\n          cxx_compiler: g++-7\n      - build_gpu:\n          name: GPU (CUDA 9.2)\n          cuda_version: \"9.2\"\n          # Excluding CudaGdr for lack of InfiniBand hardware, and CudaIpc on\n          # multi GPU for lack of p2p capabilities.\n          exclude_tests: \"CudaGdr*:CudaIpc/CudaMultiGPUChannelTestSuite*\"\n      - build_gpu:\n          name: GPU (CUDA 10.1)\n          cuda_version: \"10.1\"\n          # Excluding CudaGdr for lack of InfiniBand hardware, and CudaIpc on\n          # multi GPU for lack of p2p capabilities.\n          exclude_tests: \"CudaGdr*:CudaIpc/CudaMultiGPUChannelTestSuite*\"\n      - build_gpu:\n          name: GPU (CUDA 10.2)\n          cuda_version: \"10.2\"\n          # Excluding CudaGdr for lack of InfiniBand hardware, and CudaIpc on\n          # multi GPU for lack of p2p capabilities.\n          exclude_tests: \"CudaGdr*:CudaIpc/CudaMultiGPUChannelTestSuite*\"\n      - build_gpu:\n          name: GPU (CUDA 11.0)\n          cuda_version: \"11.0\"\n          # Excluding CudaGdr for lack of InfiniBand hardware, and CudaIpc on\n          # multi GPU for lack of p2p capabilities.\n          exclude_tests: \"CudaGdr*:CudaIpc/CudaMultiGPUChannelTestSuite*\"\n      - build_gpu:\n          name: GPU (CUDA 11.1)\n          cuda_version: \"11.1\"\n          # Excluding CudaGdr for lack of InfiniBand hardware, and CudaIpc on\n          # multi GPU for lack of p2p capabilities, and CudaBasic/CudaMultiGPUChannelTestSuite.SendAcrossNonDefaultDevices/0\n          # because it does not work with CUDA 11.1 (cf. https://github.com/pytorch/tensorpipe/issues/368).\n          exclude_tests: \"CudaGdr*:CudaIpc/CudaMultiGPUChannelTestSuite*:CudaBasic/CudaMultiGPUChannelTestSuite.SendAcrossNonDefaultDevices/0\"\n      - build_osx:\n          name: OSX\n      - python:\n          name: python\n          docker_image: ubuntu:18.04\n          apt_get: \"clang-6.0\"\n      - format:\n          name: clang-format\n"
  },
  {
    "path": ".gitignore",
    "content": "*~\n.DS_Store\n/build/\n/cmake-build-debug/\n"
  },
  {
    "path": ".gitmodules",
    "content": "[submodule \"third_party/pybind11\"]\n\tpath = third_party/pybind11\n\turl = https://github.com/pybind/pybind11.git\n[submodule \"third_party/libuv\"]\n\tpath = third_party/libuv\n\turl = https://github.com/libuv/libuv.git\n\tbranch = v1.x\n[submodule \"third_party/googletest\"]\n\tpath = third_party/googletest\n\turl = https://github.com/google/googletest.git\n[submodule \"third_party/libnop\"]\n\tpath = third_party/libnop\n\turl = https://github.com/google/libnop.git\n"
  },
  {
    "path": "CMakeLists.txt",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\ncmake_minimum_required(VERSION 3.18 FATAL_ERROR)\n\nproject(tensorpipe LANGUAGES C CXX)\n\nset(CMAKE_CXX_STANDARD 17)\n\nlist(APPEND CMAKE_MODULE_PATH \"${PROJECT_SOURCE_DIR}/cmake\")\n\n# Expose build options.\ninclude(Options)\n\n# Define sanitizer option, if specified.\ninclude(Sanitize)\n\n# Misc checks to cope with various compiler modes.\ninclude(MiscCheck)\n\nadd_subdirectory(tensorpipe)\n\ninstall(EXPORT TensorpipeTargets\n        DESTINATION share/cmake/Tensorpipe\n        FILE TensorpipeTargets.cmake)\n"
  },
  {
    "path": "CODE_OF_CONDUCT.md",
    "content": "# Code of Conduct\n\n## Our Pledge\n\nIn the interest of fostering an open and welcoming environment, we as\ncontributors and maintainers pledge to make participation in our project and\nour community a harassment-free experience for everyone, regardless of age, body\nsize, disability, ethnicity, sex characteristics, gender identity and expression,\nlevel of experience, education, socio-economic status, nationality, personal\nappearance, race, religion, or sexual identity and orientation.\n\n## Our Standards\n\nExamples of behavior that contributes to creating a positive environment\ninclude:\n\n* Using welcoming and inclusive language\n* Being respectful of differing viewpoints and experiences\n* Gracefully accepting constructive criticism\n* Focusing on what is best for the community\n* Showing empathy towards other community members\n\nExamples of unacceptable behavior by participants include:\n\n* The use of sexualized language or imagery and unwelcome sexual attention or\n  advances\n* Trolling, insulting/derogatory comments, and personal or political attacks\n* Public or private harassment\n* Publishing others' private information, such as a physical or electronic\n  address, without explicit permission\n* Other conduct which could reasonably be considered inappropriate in a\n  professional setting\n\n## Our Responsibilities\n\nProject maintainers are responsible for clarifying the standards of acceptable\nbehavior and are expected to take appropriate and fair corrective action in\nresponse to any instances of unacceptable behavior.\n\nProject maintainers have the right and responsibility to remove, edit, or\nreject comments, commits, code, wiki edits, issues, and other contributions\nthat are not aligned to this Code of Conduct, or to ban temporarily or\npermanently any contributor for other behaviors that they deem inappropriate,\nthreatening, offensive, or harmful.\n\n## Scope\n\nThis Code of Conduct applies within all project spaces, and it also applies when\nan individual is representing the project or its community in public spaces.\nExamples of representing a project or community include using an official\nproject e-mail address, posting via an official social media account, or acting\nas an appointed representative at an online or offline event. Representation of\na project may be further defined and clarified by project maintainers.\n\n## Enforcement\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be\nreported by contacting the project team at <conduct@pytorch.org>. All\ncomplaints will be reviewed and investigated and will result in a response that\nis deemed necessary and appropriate to the circumstances. The project team is\nobligated to maintain confidentiality with regard to the reporter of an incident.\nFurther details of specific enforcement policies may be posted separately.\n\nProject maintainers who do not follow or enforce the Code of Conduct in good\nfaith may face temporary or permanent repercussions as determined by other\nmembers of the project's leadership.\n\n## Attribution\n\nThis Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,\navailable at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html\n\n[homepage]: https://www.contributor-covenant.org\n\nFor answers to common questions about this code of conduct, see\nhttps://www.contributor-covenant.org/faq\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "content": "# Contributing to TensorPipe\n\nWe want to make contributing to this project as easy and transparent as\npossible.\n\n## Our Development Process\n\nThis project's source-of-truth is the version in Facebook's internal codebase,\nwhich is continuously synced with the GitHub mirror using\n[ShipIt](https://github.com/facebook/fbshipit). Pull requests on GitHub are\ncopied over using ImportIt (a companion tool for ShipIt).\n\n## Pull Requests\n\nWe actively welcome your pull requests.\n\n1. Fork the repo and create your branch from `main`.\n2. If you've added code that should be tested, add tests.\n3. If you've changed APIs, update the documentation.\n4. Ensure the test suite passes.\n5. Make sure your code lints.\n6. If you haven't already, complete the Contributor License Agreement (\"CLA\").\n\n## Contributor License Agreement (\"CLA\")\n\nIn order to accept your pull request, we need you to submit a CLA. You only\nneed to do this once to work on any of Facebook's open source projects.\n\nComplete your CLA here: <https://code.facebook.com/cla>\n\n## Issues\n\nWe use GitHub issues to track public bugs. Please ensure your description is\nclear and has sufficient instructions to be able to reproduce the issue.\n\nFacebook has a [bounty program](https://www.facebook.com/whitehat/) for the\nsafe disclosure of security bugs. In those cases, please go through the process\noutlined on that page and do not file a public issue.\n\n## Coding Style\n\nThis source code is formatted using `clang-format`, with project-specific rules\nrecorded in the `.clang-format` file.\n\n## License\n\nBy contributing to TensorPipe, you agree that your contributions will be\nlicensed under the LICENSE.txt file in the root directory of this source tree.\n"
  },
  {
    "path": "LICENSE.txt",
    "content": "BSD License\n\nFor TensorPipe software\n\nCopyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.\n\nRedistribution and use in source and binary forms, with or without modification,\nare permitted provided that the following conditions are met:\n\n * Redistributions of source code must retain the above copyright notice, this\n   list of conditions and the following disclaimer.\n\n * Redistributions in binary form must reproduce the above copyright notice,\n   this list of conditions and the following disclaimer in the documentation\n   and/or other materials provided with the distribution.\n\n * Neither the name Meta nor the names of its contributors may be used to\n   endorse or promote products derived from this software without specific\n   prior written permission.\n\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND\nANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED\nWARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR\nANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES\n(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\nLOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON\nANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS\nSOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n"
  },
  {
    "path": "README.md",
    "content": "# TensorPipe\n\nThe TensorPipe project provides a tensor-aware channel to transfer rich objects\nfrom one process to another while using the fastest transport for the tensors\ncontained therein (e.g., CUDA device-to-device copy).\n\n> :warning: Update (2025-12) tensorpipe is in maintenance mode and no new changes are planned beyond minimal build fixes. Please see https://github.com/meta-pytorch/torchcomms and https://github.com/meta-pytorch/monarch for alternatives.\n\n## Getting started\n\nFirst clone the repository:\n\n```shell\n$ git clone --recursive https://github.com/pytorch/tensorpipe\n```\n\nThen, build as follows (using ninja instead of make):\n\n``` shell\n$ cd tensorpipe\n$ mkdir build\n$ cd build\n$ cmake ../ -GNinja\n$ ninja\n```\n\nYou can find test executables in `build/tensorpipe/test`.\n\n## Interface\n\nThere are four classes you need to know about:\n\n- `tensorpipe::Context`, which keeps track of the global state of the system,\n  such as thread pools, open file descriptors, etc.\n- `tensorpipe::Listener`, which allows one process to open an entry point for\n  other processes to connect to.\n- `tensorpipe::Pipe`, the one communication primitive that this entire project\n  is about. You can obtain one either by connecting to the listener of another\n  process or from such a listener when another process connects to it. Once you\n  have a pipe, you can send messages on it, and that's the whole point.\n- `tensorpipe::Message`, which is the the language that pipes read and write in.\n  Pipes are streams of structured messages (not just raw byte buffers), and a\n  message is composed of a \"core\" payload (memory living on CPU) plus a list of\n  tensors (memory living on any device, like GPUs).\n\nSending a message from one end of the pipe to the other can be achieved using\nthe `write` method, which takes a message (with the data to send) and a\ncallback which will be invoked once the sending has completed. This callback\nwill be invoked with an error (if one happened) and with the message.\n\nReceiving a message takes two steps: on an incoming message, first the pipe\nasks you to provide some memory to hold the message in, and then you ask the\npipe to read the data into that memory. In order to do this, first you must\nregister a callback that will be notified for incoming messages. This is\nperformed by calling the `readDescriptor` method with said callback. The\ncallback will be invoked with a so-called descriptor, which can be seen as a\n\"message skeleton\", i.e., a message with no buffers attached to it (they are\nset to null pointers). The job of this callback is filling in those buffers,\neither by allocating the required memory or by obtaining it from somewhere else\n(from a cache, as a slice of a batch that's being assembled, ...). This\ndescriptor also contains some metadata, given by the sender, which can be used\nto provide allocation hints or any other information that can help the receiver\ndetermine where to store the data. Once the message's buffers are ready, you\ncan tell the pipe to go ahead and fill them in with the incoming data by\npassing the message to the `read` method, together with a callback which will\nbe called when all the data has been received and stored. As when writing, this\ncallback will be given a (possibly empty) error and the original message. The\n`readDescriptor` callback is one-shot, which means that after it fires it\n\"expires\" and will not be called again. It must be re-armed for a new event to\nbe received.\n\nWhen you pass a message to the pipe, to send it or to receive into it, you must\nnot tamper with the underlying memory until the callback has completed, even if\nthe `write` or `read` call already returned. (The `write` and `read` calls, and\nall other calls, are non-blocking so that it's easier to schedule asynchronous\nparallel trasfers without having to use threads). This means you can not deallocate\nthe memory or alter it in any way, as the pipe may still be reading or\nmodifying it. In other terms, you relinquish control over the memory when you\npass a message to the pipe, only to reacquire it once the message is given back\nto you in the callback. This contract is encoded by the requirement to move the\nmessages into and out of the pipe (using rvalue references). Also, because of\nthis agreement, all callbacks will always be called, even if the pipe is closed\nor if it errors, in order to give back the memory.\n\nThe order in which messages are written to a pipe is preserved when these\nmessages are read on the other side. Moreover, for a given pipe endpoint, the\ncallbacks of the performed operations are executed in the same order that these\noperations were scheduled, even if the operations are performed asynchronously\nor out-of-band and thus may overlap or occur out of order. What this means is\nthat if two write operations are scheduled one after the other back-to-back,\neven if the second one completes before the first one, its callback is delayed\nuntil the first one also completes and its callback is invoked. The same\napplies for reads. All the callbacks of all the pipes in a given context are\ncalled from the same per-context thread and thus no two callbacks will occur at\nthe same time. However, different contexts will use different threads and their\ncallbacks may thus overlap.\n\nAll the callbacks are invoked with an error reference. This may be \"empty\",\ni.e., indicate that no error has in fact occurred. In this case, the error\nobject evaluates to false. In case of an actual error it will instead evaluate\nto true. When invoked with an error, the remaining arguments of the callback\nmay be meaningless. For the `read` and `write` callbacks they will still\ncontain the message that these methods will be invoked with, but the\n`readDescriptor` one will be an empty or invalid message. It should not be\nused.\n\nThere is no expectation for the `readDescriptor` callback to be armed at all\ntimes. Similarly, it is not necessary to call the `read` method immediately\nafter a descriptor has been read. Both these possibilities are by design, in\norder to allow the user of the pipe to apply some backpressure in case it's\nreceiving messages at a faster rate than it can handle, or for any other\nreason. This backpressure will be propagated to the lower-level components as\nas far down as possible (e.g., by stopping listening for readability events on\nthe socket file descriptor).\n\n## Transports and channels\n\nTensorPipe aims to be \"backend-agnostic\": it doesn't want to be restricted to a\nsingle way of copying data around but wants to be able to choose the fastest\nmedium from a library of backends, based on the circumstances (e.g., are the two\nprocesses on the same machine?) and on the available hardware (e.g., are the\nGPUs connected with NVLink?). TensorPipe strives to have the largest selection\nof backends, enabling users to implement specific backends for their systems\n(should the default ones prove limited) and encouraging contributions.\n\nThe two processes that are establishing a pipe will automatically negotiate\nduring setup to determine which of the backends they have at their disposal can\nbe used and how well they would perform, in order to choose the best one in a\nway that is completely transparent to the user.\n\nBackends come in two flavors:\n\n- Transports are the connections used by the pipes to transfer control messages,\n  and the (smallish) core payloads. They are meant to be lightweight and\n  low-latency. The most basic transport is a simple TCP one, which should work\n  in all scenarios. A more optimized one, for example, is based on a ring buffer\n  allocated in shared memory, which two processes on the same machine can use to\n  communicate by performing just a memory copy, without passing through the\n  kernel.\n\n- Channels are where the heavy lifting takes place, as they take care of copying\n  the (larger) tensor data. High bandwidths are a requirement. Examples include\n  multiplexing chunks of data across multiple TCP sockets and processes, so to\n  saturate the NIC's bandwidth. Or using a CUDA memcpy call to transfer memory\n  from one GPU to another using NVLink.\n\nThese different usage patterns promote different design choices when\nimplementing transports and channels, which means the two are not perfectly\ninterchangeable. For example, a TCP-based transport is best implemented using a\nsingle connection, whereas a TCP-based channel will benefit from using multiple\nconnection and chunk and multiplex the payload over them in order to saturate\nthe bandwidth even on the most powerful NICs.\n\nMoreover, the APIs of transports and channels put different constraints on\nthem, which demand and permit different approaches. As a rule of thumb, we\nrequire more from the transports: the only out-of-band information they can use\nis a simple address, which is all they can use to bootstrap the connection, and\nthey need to include some \"signaling\" capabilities (a write on one side \"wakes\nup\" the other side by causing a read). Channels, on the other hand, have much\nlooser requirements: they basically just need to implement a `memcpy` and, for\nanything beyond that, they can leverage a transport that the pipe gives to them\nfor support.\n\n## License\n\nTensorPipe is BSD licensed, as found in the [LICENSE.txt](LICENSE.txt) file.\n"
  },
  {
    "path": "cmake/FindPackageHandleStandardArgs.cmake",
    "content": "# Copyright 2000-2020 Kitware, Inc. and Contributors\n# All rights reserved.\n#\n# Distributed under the OSI-approved BSD 3-Clause License. See\n# https://cmake.org/licensing for details.\n\n#[=======================================================================[.rst:\nFindPackageHandleStandardArgs\n-----------------------------\n\nThis module provides a function intended to be used in :ref:`Find Modules`\nimplementing :command:`find_package(<PackageName>)` calls.  It handles the\n``REQUIRED``, ``QUIET`` and version-related arguments of ``find_package``.\nIt also sets the ``<PackageName>_FOUND`` variable.  The package is\nconsidered found if all variables listed contain valid results, e.g.\nvalid filepaths.\n\n.. command:: find_package_handle_standard_args\n\n  There are two signatures::\n\n    find_package_handle_standard_args(<PackageName>\n      (DEFAULT_MSG|<custom-failure-message>)\n      <required-var>...\n      )\n\n    find_package_handle_standard_args(<PackageName>\n      [FOUND_VAR <result-var>]\n      [REQUIRED_VARS <required-var>...]\n      [VERSION_VAR <version-var>]\n      [HANDLE_COMPONENTS]\n      [CONFIG_MODE]\n      [FAIL_MESSAGE <custom-failure-message>]\n      )\n\n  The ``<PackageName>_FOUND`` variable will be set to ``TRUE`` if all\n  the variables ``<required-var>...`` are valid and any optional\n  constraints are satisfied, and ``FALSE`` otherwise.  A success or\n  failure message may be displayed based on the results and on\n  whether the ``REQUIRED`` and/or ``QUIET`` option was given to\n  the :command:`find_package` call.\n\n  The options are:\n\n  ``(DEFAULT_MSG|<custom-failure-message>)``\n    In the simple signature this specifies the failure message.\n    Use ``DEFAULT_MSG`` to ask for a default message to be computed\n    (recommended).  Not valid in the full signature.\n\n  ``FOUND_VAR <result-var>``\n    Obsolete.  Specifies either ``<PackageName>_FOUND`` or\n    ``<PACKAGENAME>_FOUND`` as the result variable.  This exists only\n    for compatibility with older versions of CMake and is now ignored.\n    Result variables of both names are always set for compatibility.\n\n  ``REQUIRED_VARS <required-var>...``\n    Specify the variables which are required for this package.\n    These may be named in the generated failure message asking the\n    user to set the missing variable values.  Therefore these should\n    typically be cache entries such as ``FOO_LIBRARY`` and not output\n    variables like ``FOO_LIBRARIES``.\n\n  ``VERSION_VAR <version-var>``\n    Specify the name of a variable that holds the version of the package\n    that has been found.  This version will be checked against the\n    (potentially) specified required version given to the\n    :command:`find_package` call, including its ``EXACT`` option.\n    The default messages include information about the required\n    version and the version which has been actually found, both\n    if the version is ok or not.\n\n  ``HANDLE_COMPONENTS``\n    Enable handling of package components.  In this case, the command\n    will report which components have been found and which are missing,\n    and the ``<PackageName>_FOUND`` variable will be set to ``FALSE``\n    if any of the required components (i.e. not the ones listed after\n    the ``OPTIONAL_COMPONENTS`` option of :command:`find_package`) are\n    missing.\n\n  ``CONFIG_MODE``\n    Specify that the calling find module is a wrapper around a\n    call to ``find_package(<PackageName> NO_MODULE)``.  This implies\n    a ``VERSION_VAR`` value of ``<PackageName>_VERSION``.  The command\n    will automatically check whether the package configuration file\n    was found.\n\n  ``FAIL_MESSAGE <custom-failure-message>``\n    Specify a custom failure message instead of using the default\n    generated message.  Not recommended.\n\nExample for the simple signature:\n\n.. code-block:: cmake\n\n  find_package_handle_standard_args(LibXml2 DEFAULT_MSG\n    LIBXML2_LIBRARY LIBXML2_INCLUDE_DIR)\n\nThe ``LibXml2`` package is considered to be found if both\n``LIBXML2_LIBRARY`` and ``LIBXML2_INCLUDE_DIR`` are valid.\nThen also ``LibXml2_FOUND`` is set to ``TRUE``.  If it is not found\nand ``REQUIRED`` was used, it fails with a\n:command:`message(FATAL_ERROR)`, independent whether ``QUIET`` was\nused or not.  If it is found, success will be reported, including\nthe content of the first ``<required-var>``.  On repeated CMake runs,\nthe same message will not be printed again.\n\nExample for the full signature:\n\n.. code-block:: cmake\n\n  find_package_handle_standard_args(LibArchive\n    REQUIRED_VARS LibArchive_LIBRARY LibArchive_INCLUDE_DIR\n    VERSION_VAR LibArchive_VERSION)\n\nIn this case, the ``LibArchive`` package is considered to be found if\nboth ``LibArchive_LIBRARY`` and ``LibArchive_INCLUDE_DIR`` are valid.\nAlso the version of ``LibArchive`` will be checked by using the version\ncontained in ``LibArchive_VERSION``.  Since no ``FAIL_MESSAGE`` is given,\nthe default messages will be printed.\n\nAnother example for the full signature:\n\n.. code-block:: cmake\n\n  find_package(Automoc4 QUIET NO_MODULE HINTS /opt/automoc4)\n  find_package_handle_standard_args(Automoc4  CONFIG_MODE)\n\nIn this case, a ``FindAutmoc4.cmake`` module wraps a call to\n``find_package(Automoc4 NO_MODULE)`` and adds an additional search\ndirectory for ``automoc4``.  Then the call to\n``find_package_handle_standard_args`` produces a proper success/failure\nmessage.\n#]=======================================================================]\n\ninclude(${CMAKE_CURRENT_LIST_DIR}/FindPackageMessage.cmake)\n\n# internal helper macro\nmacro(_FPHSA_FAILURE_MESSAGE _msg)\n  if (${_NAME}_FIND_REQUIRED)\n    message(FATAL_ERROR \"${_msg}\")\n  else ()\n    if (NOT ${_NAME}_FIND_QUIETLY)\n      message(STATUS \"${_msg}\")\n    endif ()\n  endif ()\nendmacro()\n\n\n# internal helper macro to generate the failure message when used in CONFIG_MODE:\nmacro(_FPHSA_HANDLE_FAILURE_CONFIG_MODE)\n  # <name>_CONFIG is set, but FOUND is false, this means that some other of the REQUIRED_VARS was not found:\n  if(${_NAME}_CONFIG)\n    _FPHSA_FAILURE_MESSAGE(\"${FPHSA_FAIL_MESSAGE}: missing:${MISSING_VARS} (found ${${_NAME}_CONFIG} ${VERSION_MSG})\")\n  else()\n    # If _CONSIDERED_CONFIGS is set, the config-file has been found, but no suitable version.\n    # List them all in the error message:\n    if(${_NAME}_CONSIDERED_CONFIGS)\n      set(configsText \"\")\n      list(LENGTH ${_NAME}_CONSIDERED_CONFIGS configsCount)\n      math(EXPR configsCount \"${configsCount} - 1\")\n      foreach(currentConfigIndex RANGE ${configsCount})\n        list(GET ${_NAME}_CONSIDERED_CONFIGS ${currentConfigIndex} filename)\n        list(GET ${_NAME}_CONSIDERED_VERSIONS ${currentConfigIndex} version)\n        string(APPEND configsText \"    ${filename} (version ${version})\\n\")\n      endforeach()\n      if (${_NAME}_NOT_FOUND_MESSAGE)\n        string(APPEND configsText \"    Reason given by package: ${${_NAME}_NOT_FOUND_MESSAGE}\\n\")\n      endif()\n      _FPHSA_FAILURE_MESSAGE(\"${FPHSA_FAIL_MESSAGE} ${VERSION_MSG}, checked the following files:\\n${configsText}\")\n\n    else()\n      # Simple case: No Config-file was found at all:\n      _FPHSA_FAILURE_MESSAGE(\"${FPHSA_FAIL_MESSAGE}: found neither ${_NAME}Config.cmake nor ${_NAME_LOWER}-config.cmake ${VERSION_MSG}\")\n    endif()\n  endif()\nendmacro()\n\n\nfunction(FIND_PACKAGE_HANDLE_STANDARD_ARGS _NAME _FIRST_ARG)\n\n# Set up the arguments for `cmake_parse_arguments`.\n  set(options  CONFIG_MODE  HANDLE_COMPONENTS)\n  set(oneValueArgs  FAIL_MESSAGE  VERSION_VAR  FOUND_VAR)\n  set(multiValueArgs REQUIRED_VARS)\n\n# Check whether we are in 'simple' or 'extended' mode:\n  set(_KEYWORDS_FOR_EXTENDED_MODE  ${options} ${oneValueArgs} ${multiValueArgs} )\n  list(FIND _KEYWORDS_FOR_EXTENDED_MODE \"${_FIRST_ARG}\" INDEX)\n\n  if(${INDEX} EQUAL -1)\n    set(FPHSA_FAIL_MESSAGE ${_FIRST_ARG})\n    set(FPHSA_REQUIRED_VARS ${ARGN})\n    set(FPHSA_VERSION_VAR)\n  else()\n    cmake_parse_arguments(FPHSA \"${options}\" \"${oneValueArgs}\" \"${multiValueArgs}\"  ${_FIRST_ARG} ${ARGN})\n\n    if(FPHSA_UNPARSED_ARGUMENTS)\n      message(FATAL_ERROR \"Unknown keywords given to FIND_PACKAGE_HANDLE_STANDARD_ARGS(): \\\"${FPHSA_UNPARSED_ARGUMENTS}\\\"\")\n    endif()\n\n    if(NOT FPHSA_FAIL_MESSAGE)\n      set(FPHSA_FAIL_MESSAGE  \"DEFAULT_MSG\")\n    endif()\n\n    # In config-mode, we rely on the variable <package>_CONFIG, which is set by find_package()\n    # when it successfully found the config-file, including version checking:\n    if(FPHSA_CONFIG_MODE)\n      list(INSERT FPHSA_REQUIRED_VARS 0 ${_NAME}_CONFIG)\n      list(REMOVE_DUPLICATES FPHSA_REQUIRED_VARS)\n      set(FPHSA_VERSION_VAR ${_NAME}_VERSION)\n    endif()\n\n    if(NOT FPHSA_REQUIRED_VARS)\n      message(FATAL_ERROR \"No REQUIRED_VARS specified for FIND_PACKAGE_HANDLE_STANDARD_ARGS()\")\n    endif()\n  endif()\n\n# now that we collected all arguments, process them\n\n  if(\"x${FPHSA_FAIL_MESSAGE}\" STREQUAL \"xDEFAULT_MSG\")\n    set(FPHSA_FAIL_MESSAGE \"Could NOT find ${_NAME}\")\n  endif()\n\n  list(GET FPHSA_REQUIRED_VARS 0 _FIRST_REQUIRED_VAR)\n\n  string(TOUPPER ${_NAME} _NAME_UPPER)\n  string(TOLOWER ${_NAME} _NAME_LOWER)\n\n  if(FPHSA_FOUND_VAR)\n    if(FPHSA_FOUND_VAR MATCHES \"^${_NAME}_FOUND$\"  OR  FPHSA_FOUND_VAR MATCHES \"^${_NAME_UPPER}_FOUND$\")\n      set(_FOUND_VAR ${FPHSA_FOUND_VAR})\n    else()\n      message(FATAL_ERROR \"The argument for FOUND_VAR is \\\"${FPHSA_FOUND_VAR}\\\", but only \\\"${_NAME}_FOUND\\\" and \\\"${_NAME_UPPER}_FOUND\\\" are valid names.\")\n    endif()\n  else()\n    set(_FOUND_VAR ${_NAME_UPPER}_FOUND)\n  endif()\n\n  # collect all variables which were not found, so they can be printed, so the\n  # user knows better what went wrong (#6375)\n  set(MISSING_VARS \"\")\n  set(DETAILS \"\")\n  # check if all passed variables are valid\n  set(FPHSA_FOUND_${_NAME} TRUE)\n  foreach(_CURRENT_VAR ${FPHSA_REQUIRED_VARS})\n    if(NOT ${_CURRENT_VAR})\n      set(FPHSA_FOUND_${_NAME} FALSE)\n      string(APPEND MISSING_VARS \" ${_CURRENT_VAR}\")\n    else()\n      string(APPEND DETAILS \"[${${_CURRENT_VAR}}]\")\n    endif()\n  endforeach()\n  if(FPHSA_FOUND_${_NAME})\n    set(${_NAME}_FOUND TRUE)\n    set(${_NAME_UPPER}_FOUND TRUE)\n  else()\n    set(${_NAME}_FOUND FALSE)\n    set(${_NAME_UPPER}_FOUND FALSE)\n  endif()\n\n  # component handling\n  unset(FOUND_COMPONENTS_MSG)\n  unset(MISSING_COMPONENTS_MSG)\n\n  if(FPHSA_HANDLE_COMPONENTS)\n    foreach(comp ${${_NAME}_FIND_COMPONENTS})\n      if(${_NAME}_${comp}_FOUND)\n\n        if(NOT DEFINED FOUND_COMPONENTS_MSG)\n          set(FOUND_COMPONENTS_MSG \"found components: \")\n        endif()\n        string(APPEND FOUND_COMPONENTS_MSG \" ${comp}\")\n\n      else()\n\n        if(NOT DEFINED MISSING_COMPONENTS_MSG)\n          set(MISSING_COMPONENTS_MSG \"missing components: \")\n        endif()\n        string(APPEND MISSING_COMPONENTS_MSG \" ${comp}\")\n\n        if(${_NAME}_FIND_REQUIRED_${comp})\n          set(${_NAME}_FOUND FALSE)\n          string(APPEND MISSING_VARS \" ${comp}\")\n        endif()\n\n      endif()\n    endforeach()\n    set(COMPONENT_MSG \"${FOUND_COMPONENTS_MSG} ${MISSING_COMPONENTS_MSG}\")\n    string(APPEND DETAILS \"[c${COMPONENT_MSG}]\")\n  endif()\n\n  # version handling:\n  set(VERSION_MSG \"\")\n  set(VERSION_OK TRUE)\n\n  # check with DEFINED here as the requested or found version may be \"0\"\n  if (DEFINED ${_NAME}_FIND_VERSION)\n    if(DEFINED ${FPHSA_VERSION_VAR})\n      set(_FOUND_VERSION ${${FPHSA_VERSION_VAR}})\n\n      if(${_NAME}_FIND_VERSION_EXACT)       # exact version required\n        # count the dots in the version string\n        string(REGEX REPLACE \"[^.]\" \"\" _VERSION_DOTS \"${_FOUND_VERSION}\")\n        # add one dot because there is one dot more than there are components\n        string(LENGTH \"${_VERSION_DOTS}.\" _VERSION_DOTS)\n        if (_VERSION_DOTS GREATER ${_NAME}_FIND_VERSION_COUNT)\n          # Because of the C++ implementation of find_package() ${_NAME}_FIND_VERSION_COUNT\n          # is at most 4 here. Therefore a simple lookup table is used.\n          if (${_NAME}_FIND_VERSION_COUNT EQUAL 1)\n            set(_VERSION_REGEX \"[^.]*\")\n          elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 2)\n            set(_VERSION_REGEX \"[^.]*\\\\.[^.]*\")\n          elseif (${_NAME}_FIND_VERSION_COUNT EQUAL 3)\n            set(_VERSION_REGEX \"[^.]*\\\\.[^.]*\\\\.[^.]*\")\n          else ()\n            set(_VERSION_REGEX \"[^.]*\\\\.[^.]*\\\\.[^.]*\\\\.[^.]*\")\n          endif ()\n          string(REGEX REPLACE \"^(${_VERSION_REGEX})\\\\..*\" \"\\\\1\" _VERSION_HEAD \"${_FOUND_VERSION}\")\n          unset(_VERSION_REGEX)\n          if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _VERSION_HEAD)\n            set(VERSION_MSG \"Found unsuitable version \\\"${_FOUND_VERSION}\\\", but required is exact version \\\"${${_NAME}_FIND_VERSION}\\\"\")\n            set(VERSION_OK FALSE)\n          else ()\n            set(VERSION_MSG \"(found suitable exact version \\\"${_FOUND_VERSION}\\\")\")\n          endif ()\n          unset(_VERSION_HEAD)\n        else ()\n          if (NOT ${_NAME}_FIND_VERSION VERSION_EQUAL _FOUND_VERSION)\n            set(VERSION_MSG \"Found unsuitable version \\\"${_FOUND_VERSION}\\\", but required is exact version \\\"${${_NAME}_FIND_VERSION}\\\"\")\n            set(VERSION_OK FALSE)\n          else ()\n            set(VERSION_MSG \"(found suitable exact version \\\"${_FOUND_VERSION}\\\")\")\n          endif ()\n        endif ()\n        unset(_VERSION_DOTS)\n\n      else()     # minimum version specified:\n        if (${_NAME}_FIND_VERSION VERSION_GREATER _FOUND_VERSION)\n          set(VERSION_MSG \"Found unsuitable version \\\"${_FOUND_VERSION}\\\", but required is at least \\\"${${_NAME}_FIND_VERSION}\\\"\")\n          set(VERSION_OK FALSE)\n        else ()\n          set(VERSION_MSG \"(found suitable version \\\"${_FOUND_VERSION}\\\", minimum required is \\\"${${_NAME}_FIND_VERSION}\\\")\")\n        endif ()\n      endif()\n\n    else()\n\n      # if the package was not found, but a version was given, add that to the output:\n      if(${_NAME}_FIND_VERSION_EXACT)\n         set(VERSION_MSG \"(Required is exact version \\\"${${_NAME}_FIND_VERSION}\\\")\")\n      else()\n         set(VERSION_MSG \"(Required is at least version \\\"${${_NAME}_FIND_VERSION}\\\")\")\n      endif()\n\n    endif()\n  else ()\n    # Check with DEFINED as the found version may be 0.\n    if(DEFINED ${FPHSA_VERSION_VAR})\n      set(VERSION_MSG \"(found version \\\"${${FPHSA_VERSION_VAR}}\\\")\")\n    endif()\n  endif ()\n\n  if(VERSION_OK)\n    string(APPEND DETAILS \"[v${${FPHSA_VERSION_VAR}}(${${_NAME}_FIND_VERSION})]\")\n  else()\n    set(${_NAME}_FOUND FALSE)\n  endif()\n\n\n  # print the result:\n  if (${_NAME}_FOUND)\n    FIND_PACKAGE_MESSAGE(${_NAME} \"Found ${_NAME}: ${${_FIRST_REQUIRED_VAR}} ${VERSION_MSG} ${COMPONENT_MSG}\" \"${DETAILS}\")\n  else ()\n\n    if(FPHSA_CONFIG_MODE)\n      _FPHSA_HANDLE_FAILURE_CONFIG_MODE()\n    else()\n      if(NOT VERSION_OK)\n        _FPHSA_FAILURE_MESSAGE(\"${FPHSA_FAIL_MESSAGE}: ${VERSION_MSG} (found ${${_FIRST_REQUIRED_VAR}})\")\n      else()\n        _FPHSA_FAILURE_MESSAGE(\"${FPHSA_FAIL_MESSAGE} (missing:${MISSING_VARS}) ${VERSION_MSG}\")\n      endif()\n    endif()\n\n  endif ()\n\n  set(${_NAME}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)\n  set(${_NAME_UPPER}_FOUND ${${_NAME}_FOUND} PARENT_SCOPE)\nendfunction()\n"
  },
  {
    "path": "cmake/FindPackageMessage.cmake",
    "content": "# Copyright 2000-2020 Kitware, Inc. and Contributors\n# All rights reserved.\n#\n# Distributed under the OSI-approved BSD 3-Clause License. See\n# https://cmake.org/licensing for details.\n\n#.rst:\n# FindPackageMessage\n# ------------------\n#\n#\n#\n# FIND_PACKAGE_MESSAGE(<name> \"message for user\" \"find result details\")\n#\n# This macro is intended to be used in FindXXX.cmake modules files.  It\n# will print a message once for each unique find result.  This is useful\n# for telling the user where a package was found.  The first argument\n# specifies the name (XXX) of the package.  The second argument\n# specifies the message to display.  The third argument lists details\n# about the find result so that if they change the message will be\n# displayed again.  The macro also obeys the QUIET argument to the\n# find_package command.\n#\n# Example:\n#\n# ::\n#\n#   if(X11_FOUND)\n#     FIND_PACKAGE_MESSAGE(X11 \"Found X11: ${X11_X11_LIB}\"\n#       \"[${X11_X11_LIB}][${X11_INCLUDE_DIR}]\")\n#   else()\n#    ...\n#   endif()\n\nfunction(FIND_PACKAGE_MESSAGE pkg msg details)\n  # Avoid printing a message repeatedly for the same find result.\n  if(NOT ${pkg}_FIND_QUIETLY)\n    string(REPLACE \"\\n\" \"\" details \"${details}\")\n    set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg})\n    if(NOT \"${details}\" STREQUAL \"${${DETAILS_VAR}}\")\n      # The message has not yet been printed.\n      message(STATUS \"${msg}\")\n\n      # Save the find details in the cache to avoid printing the same\n      # message again.\n      set(\"${DETAILS_VAR}\" \"${details}\"\n        CACHE INTERNAL \"Details about finding ${pkg}\")\n    endif()\n  endif()\nendfunction()\n"
  },
  {
    "path": "cmake/Finduv.cmake",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\n#\n# Finduv\n# ------\n#\n# Imported Targets\n# ^^^^^^^^^^^^^^^^\n#\n# An imported target named ``uv::uv`` is provided if libuv has been found.\n#\n# Result Variables\n# ^^^^^^^^^^^^^^^^\n#\n# This module defines the following variables:\n#\n# ``uv_FOUND``\n#   True if libuv was found, false otherwise.\n# ``uv_LIBRARY_DIRS``\n#   The path(s) to uv libraries.\n# ``uv_VERSION``\n#   The version of libuv found.\n#\n\nfind_package(PkgConfig QUIET)\n\nif((NOT TP_BUILD_LIBUV) AND PkgConfig_FOUND)\n  pkg_check_modules(uv QUIET IMPORTED_TARGET GLOBAL libuv)\n  if(uv_FOUND)\n    add_library(uv::uv ALIAS PkgConfig::uv)\n  endif()\nendif()\n\nif(NOT uv_FOUND)\n  set(uv_VERSION \"1.51.0\")\n  set(uv_LIBRARY_DIRS \"submodule\")\n\n  set(libuv_DIR ${PROJECT_SOURCE_DIR}/third_party/libuv)\n  add_subdirectory(${libuv_DIR}\n    ${PROJECT_BINARY_DIR}/third_party/libuv\n    EXCLUDE_FROM_ALL)\n\n  # This hack duplicates the `uv_a` target, so that we can call\n  # install(TARGETS ... EXPORT) on it, which is not possible when the target is\n  # defined in a subdirectory in CMake 3.5.\n  get_target_property(_uv_sources uv_a SOURCES)\n  set(_uv_sources_abs)\n  foreach(_uv_src ${_uv_sources})\n    list(APPEND _uv_sources_abs \"${libuv_DIR}/${_uv_src}\")\n  endforeach()\n\n  add_library(tensorpipe_uv STATIC ${_uv_sources_abs})\n  if(BUILD_SHARED_LIBS)\n    set_target_properties(tensorpipe_uv PROPERTIES POSITION_INDEPENDENT_CODE 1)\n  endif()\n\n  get_target_property(_link_libs uv_a LINK_LIBRARIES)\n  target_link_libraries(tensorpipe_uv PRIVATE ${_link_libs})\n\n  get_target_property(_include_dirs uv_a INCLUDE_DIRECTORIES)\n  target_include_directories(tensorpipe_uv PRIVATE ${_include_dirs})\n  target_include_directories(tensorpipe_uv PUBLIC $<BUILD_INTERFACE:${libuv_DIR}/include>)\n\n  get_target_property(_compile_definitions uv_a COMPILE_DEFINITIONS)\n  target_compile_definitions(tensorpipe_uv PRIVATE ${_compile_definitions})\n\n  get_target_property(_compile_options uv_a COMPILE_OPTIONS)\n  target_compile_options(tensorpipe_uv PRIVATE ${_compile_options})\n\n  install(TARGETS tensorpipe_uv\n          EXPORT TensorpipeTargets\n          ARCHIVE DESTINATION ${TP_INSTALL_LIBDIR})\n\n  add_library(uv::uv ALIAS tensorpipe_uv)\nendif()\n\ninclude(FindPackageHandleStandardArgs)\nfind_package_handle_standard_args(uv\n  REQUIRED_VARS uv_VERSION\n  VERSION_VAR uv_VERSION)\n"
  },
  {
    "path": "cmake/MiscCheck.cmake",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\ninclude(CheckCXXSourceCompiles)\ninclude(CMakePushCheckState)\n\n# We use the [[nodiscard]] attribute, which GCC 5 complains about.\n# Silence this warning if GCC 5 is used.\nif(CMAKE_CXX_COMPILER_ID STREQUAL \"GNU\")\n  if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6)\n    add_definitions(\"-Wno-attributes\")\n  endif()\nendif()\n"
  },
  {
    "path": "cmake/Options.cmake",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\nif(CMAKE_SYSTEM_NAME STREQUAL \"Linux\")\n  set(LINUX ON)\nelse()\n  set(LINUX OFF)\nendif()\n\nmacro(TP_CONDITIONAL_BACKEND name docstring condition)\n  # No clue why this monstrosity is needed. But cmake_dependent_option has it,\n  # and the code doesn't seem to work without it.\n  string(REGEX REPLACE \" +\" \";\" TP_CONDITIONAL_BACKEND_CONDITION \"${condition}\")\n  if(${TP_CONDITIONAL_BACKEND_CONDITION})\n    set(TP_CONDITIONAL_BACKEND_CAN_ENABLE ON)\n  else()\n    set(TP_CONDITIONAL_BACKEND_CAN_ENABLE OFF)\n  endif()\n  set(${name} ${TP_CONDITIONAL_BACKEND_CAN_ENABLE} CACHE BOOL ${docstring})\n  if(${name} AND NOT ${TP_CONDITIONAL_BACKEND_CAN_ENABLE})\n    message(FATAL_ERROR \"${name} was explicitly set, but that can't be honored\")\n  endif()\nendmacro()\n\n# Try to auto-detect the presence of some libraries in order to enable/disable\n# the transports/channels that make use of them.\n# TODO Add CUDA to this list, in order to fix the TODO below\n\n# TODO: Default to ON if CUDA available.\noption(TP_USE_CUDA \"Enable support for CUDA tensors\" OFF)\n\n# Optional features\noption(TP_BUILD_BENCHMARK \"Build benchmarks\" OFF)\noption(TP_BUILD_MISC \"Build misc tools\" OFF)\noption(TP_BUILD_PYTHON \"Build python bindings\" OFF)\noption(TP_BUILD_TESTING \"Build tests\" OFF)\n\n# Whether to build a static or shared library\nif(BUILD_SHARED_LIBS)\n  set(TP_STATIC_OR_SHARED SHARED CACHE STRING \"\")\nelse()\n  set(TP_STATIC_OR_SHARED STATIC CACHE STRING \"\")\nendif()\nmark_as_advanced(TP_STATIC_OR_SHARED)\n\n# Force to build libuv from the included submodule\noption(TP_BUILD_LIBUV \"Build libuv from source\" OFF)\n\n# Directories\ninclude(GNUInstallDirs)\nset(TP_INSTALL_LIBDIR ${CMAKE_INSTALL_LIBDIR} CACHE STRING \"Directory in which to install libraries\")\nmark_as_advanced(TP_INSTALL_LIBDIR)\nset(TP_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE STRING \"Directory in which to install public headers\")\nmark_as_advanced(TP_INSTALL_INCLUDEDIR)\n"
  },
  {
    "path": "cmake/Sanitize.cmake",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\nif(SANITIZE)\n  add_definitions(\"-fsanitize=${SANITIZE}\")\n  add_definitions(\"-fno-omit-frame-pointer\")\n  set(CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS} -fsanitize=${SANITIZE}\")\nendif()\n"
  },
  {
    "path": "docs/cuda_gotchas.md",
    "content": "# CUDA gotchas\n\nWhile implementing CUDA channels we hit some undocumented \"quirks\" which forced us to adapt our original designs. We collect them here for future reference (although this list may not be exhaustive). Please add more items whenever we learn new things \"the hard way\". We’re mostly interested in unexpected behaviors that could entail substantial design changes, although smaller technical pitfalls are welcome too.\n\n## Most functions initialize a context on the current device\n\nA lot of CUDA functions cause a CUDA context to be initialized for the \"current\" device (which is a thread-local variable managed by CUDA). This consumes on-device memory (plus it can cause deadlocks when combined with NCCL). By invoking CUDA functions without first explicitly setting the current device we risk accidentally initializing CUDA contexts on devices on which we weren’t supposed to (especially device 0, since it’s the \"default\"). In order to avoid this, a device guard should be used for *all* operations. They are very cheap, hence don’t be shy! At times it’s not clear which device should be used in such guard, for example during initialization, however we must only use devices that the user has explicitly provided, hence we may have to lazily delay initialization in those cases.\n\n## Querying the device of a pointer can fail\n\nBy choice, TensorPipe doesn’t ask users to provide the device index when they pass in a CUDA pointer, for simplicity, since it would be redundant as the device index can be extracted from the pointer. This \"extraction\" is thus the only CUDA operation for which we can’t possibly set up a device guard. This has proven to be a problem because, due to a bug in CUDA, the extraction would fail if the current device had been *explicitly* set to an invalid (uninitialized) device. (A default \"unset\" current device would work). This occurred often, because if we used a device guard when the current device was unset, its destructor would explicitly reset the current device to 0. Our investigation seemed to show that an unset current device in the CUDA runtime corresponded to a null current context in the CUDA driver, whereas an invalid current device corresponded to an invalid non-null context. Thus our workaround was to use the driver API directly and first reset its current context to null (in a sense, use a \"reverse\" device guard, which temporarily \"unsets\" the current device).\n\n## Releasing shared resources implicitly synchronizes\n\nSome CUDA operations perform an implicit device synchronization: they block the CPU thread until the GPU \"catches up\", that is, it waits for *all* previously-launched kernels for that device (on any stream) to complete. Such functions also cause later kernels (enqueued by another concurrent thread) to delay their launch on the device until the blocking function returns (we’ve occasionally been calling this a \"kernel fence\"). This is bad because it would mean that an internal TensorPipe operation can interfere with the user’s scheduling of kernels and thus degrade GPU utilization. The [CUDA programming guide](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#implicit-synchronization) mentions such a behavior (in section 3.2.6.5.4), however we’ve found out that the list of circumstances where this occurs is incomplete and incorrect. As a rule of thumb, we’ve seen this behavior happen mainly when *releasing* a resource shared among kernels (e.g., device memory, pinned host memory, IPC memory handles), as if CUDA wanted to ensure there were no kernels using this resource anymore before freeing it. A mental model could be to imagine that kernels acquire a shared lock to it, while freeing it needs a unique lock. The only solution to this limitation is to allocate a pool of these resources at the beginning and reuse them.\n\n## Creating IPC events deadlocks\n\nAnother CUDA bug we hit was that the creation of CUDA events with the interprocess flag would sometimes deadlock. [Here’s a (not so small) repro](https://gist.github.com/lw/f34836416e7674bbdda8b4925c2999f2). We couldn’t pin it down to a specific condition, or to a race with another call. NVIDIA confirmed the bug and supposedly fixed it in version 450 of the CUDA driver. Since we still need to support earlier versions, as a workaround we’re taking great care to create all our IPC events as early as possible (hoping to avoid whatever races) and reuse them.\n\n## Memory won’t be freed if there’s open IPC handles to it\n\nImagine that process B has received and opened an IPC handle to some device memory allocated and owned by process A, and process A frees this memory without B first closing its handle to it. The CUDA doc described this as undefined behavior (hence we can’t complain), but in practice what we’ve observed is that the memory will *not* be freed, that is, it will not be reused for subsequent allocation requests, thus possibly causing OOMs. In a sense, it’s if as that memory were \"leaked\". This is displayed rather confusingly in `nvidia-smi`’s accounting: the memory appears as occupied in the device statistics, but no process appears to be responsible for it.\n\n## Cannot open same IPC handle more than once\n\nThere’s a limitation in older versions of CUDA where, if process A allocates some memory, only *one* binding to it can be opened in process B using IPC handles. Attempting to re-open the same handle a second time will fail. Note that one cannot get multiple \"different\" handles for the same memory, as CUDA always returns the same one. In practice it means that the user could pass some memory for TensorPipe for which it has already manually created and shared a handle, thus it’s unsafe for TensorPipe to also get and open a handle. We can only safely do it for private memory that we’re managing ourselves. Also note that this limitation was lifted in CUDA 11.1.\n\n## The pointer for an opened IPC handle could be \"offset\" wrt the source pointer\n\nThe CUDA doc on this is clear albeit cryptic: given a pointer, CUDA returns the IPC handle for its *allocation*. Hence if we allocate some memory at address p0 and ask for the IPC handle of address p1 = p0 + offset, we’ll get the IPC handle for p0! This means that when we open the handle we need to add back that offset. Luckily CUDA offers a function to query p0 given p1. Note that this situation happens a lot in PyTorch due to the caching allocator sometimes returning slices from larger blocks.\n\n## Not all pairs of GPUs can access each other’s memory\n\nDevice to device (D2D) transfers are supported by CUDA only when peer-to-peer (P2P) capabilities exist between the two GPUs. This is handled transparently by CUDA, which will automatically select the most performant direct link. Concretely, it will use NVLink, but only if there’s a dedicated \"cable\" connecting those two devices. If the NVLink mesh is not a complete graph (as is often the case, e.g., hybrid-cube meshes (HCM) are very common), for the missing pairs CUDA will use PCIe transfers, but only if the two devices are attached to the same chipset/controller/host bridge. If there are multiple chipsets (which is also common, e.g., the DGX machines have two), then D2D transfers between some pairs of GPUs might just not be possible through CUDA! In principle this is easy enough to detect since CUDA offers a function for it (and `nvidia-smi topo` also displays it), however we can’t use it if the two devices aren’t both \"visible\" to the process (we’re referring to the `CUDA_VISIBLE_DEVICES` environment variable). For such cases the only option is to use the NVML library, which doesn’t honor that env var, but in turn adds the complexity of matching corresponding devices between CUDA and NVML (which is best done through their UUID). Moreover, additional complexity was required in TensorPipe to handle the case where some but not all pairs of GPUs between two processes supported P2P.\n\n## Registering CUDA memory with IB is slow\n\nThis is kinda known, but it’s better to repeat it: the registration and deregistration of memory with InfiniBand is considered a \"setup\" step, and is very slow, and should thus be avoided as much as possible during the \"hot\" data path, for example using a staging area or by caching these registrations.\n\n## Registering CUDA memory with IB requires an extra NVIDIA kernel module\n\nWhen we pass a pointer to InfiniBand for registration, InfiniBand needs to understand that this virtual address points to CUDA device memory and not to some CPU memory. For that it needs to be aware of CUDA, and it does so through so-called \"peer memory client\", which NVIDIA provides (through a separate kernel module) and registers with InfiniBand, and which is queried by InfiniBand before \"falling back\" to assuming the pointer points to CPU memory. This peer memory client feature is only available in Mellanox’s InfiniBand distribution (called OFED, OpenFabrics Enterprise Distribution), and not in vanilla upstream InfiniBand. On the client side (our side) luckily nothing changes in the API.\n\n## Registering CUDA memory with IB occupies the PCIe window\n\nEach PCIe device has a handful of \"memory windows\" it exposes, through which the host or other devices can access and modify the device’s memory (both to issue commands and to send/retrieve data). These are called BARs (base address registers). In the case of NVIDIA GPUs the BAR that appears to map to the device’s main memory is BAR1. This is often sized much smaller than the memory itself (say, 256MB for a 16GB GPU), with the idea that it will just be used as a staging area. Also note that CUDA already reserves a few dozen MBs of that window. When registering CUDA device memory with InfiniBand, an additional mapping is created in that window (during the `ibv_reg_mr` call) and will thus fail if the window doesn’t have enough remaining space (e.g., if the buffer being registered is larger than the window). This means we can’t straightforwardly register the user-provided buffers. However, with the right combination of GPU and of CPU BIOS, the BAR1 can become as large as the GPU’s main memory itself, in which case this won’t be a problem anymore.\n\n## Registering CUDA memory with IB doesn’t leak it\n\nContrary to IPC handles, freeing CUDA device memory while it’s still registered with InfiniBand does not appear to interfere with the deallocation, hence the memory will correctly become reusable.\n\n## IB messages have a maximum size\n\nEach send/recv operation over InfiniBand can only handle up to a certain amount of data, usually at least 1GB, and will fail for larger amounts. This limit can be queried on the device, and chunking must be used for larger sizes.\n\nIt appears that, at least on some NICs and with some drivers, there's also a \"minimum size\" of 32 bytes, with messages failing with odd errors for smaller sizes. It's still unclear whether it's a bug.\n\n## GPUs need to be matched with the right IB NIC\n\nOn some machine types there may be multiple GPUs and multiple InfiniBand devices and they need to be carefully matched. Using the same IB NIC for all GPUs will introduce a bottleneck while leaving all other NICs unused. Matching them up \"randomly\" means that the data paths over PCIe of different GPU-NIC pairs might cross each other (thus, again, causing a bottleneck), might traverse the host, or otherwise interfere. These machines are usually set up so that each GPU has one NIC that it’s \"naturally\" closest to, for example they share the same PCIe switch, thus we need a logic to be able to detect and implement this.\n"
  },
  {
    "path": "docs/development.md",
    "content": "# Development\n\nTensorPipe uses CMake for its build system.\n\n## Dependencies\n\nTo build TensorPipe, you need:\n\n* C++14 compatible compiler (GCC >= 5.5 or Clang >= 6)\n\n## Clone the repository\n\nExample:\n\n``` shell\ngit clone --recursive https://github.com/pytorch/tensorpipe\n```\n\nIf you have updated an already cloned repository, make sure that the\nsubmodules are up to date:\n\n``` shell\ngit submodule sync\ngit submodule update --init\n```\n\nIt is imperative to check out the submodules before running CMake.\n\nFind the list of submodules and a description of what they're used for\non [this page][third_party].\n\n[third_party]: https://github.com/pytorch/tensorpipe/tree/main/third_party\n\n## Using CMake\n\nExample:\n\n``` shell\nmkdir build\ncd build\ncmake ../ -DCMAKE_BUILD_TYPE=Debug -DSANITIZE=thread\nmake\n```\n\nYou can specify CMake variables by passing them as arguments to the `cmake` command.\n\nUseful CMake variables:\n\n* `CMAKE_C_COMPILER` -- Define which C compiler to use.\n* `CMAKE_CXX_COMPILER` -- Define which C++ compiler to use.\n* `CMAKE_C_FLAGS` -- Additional flags for the C compiler.\n* `CMAKE_CXX_FLAGS` -- Additional flags for the C++ compiler.\n* `CMAKE_BUILD_TYPE` -- For example: `release`, `debug`.\n\nUseful TensorPipe specific variables:\n\n* `SANITIZE` -- configure the sanitizer to use (if any); for\n  example: `address` or `thread`, to run with `asan` or `tsan`,\n  respectively.\n\n## Ninja\n\nTo make CMake output something other than the default `Makefile`, see\n[`cmake-generators(7)`][cmake-generators]. We like to use the\n[Ninja][ninja] generator because it works well for incremental builds.\nOn the command line, specify `-GNinja` to use it.\n\n[cmake-generators]: https://cmake.org/cmake/help/v3.4/manual/cmake-generators.7.html\n[ninja]: https://en.wikipedia.org/wiki/Ninja_(build_system)\n"
  },
  {
    "path": "docs/linux_support.md",
    "content": "This document is intended for developers and advanced users. It’s the kind of document that risks going out of date very quickly, hence take it with a grain of salt.\n\nIn order to try to be as performant as possible, TensorPipe sometimes relies on new and advanced kernel features. This is causing issues to users who are building and/or running on old kernels. Hence, whenever we use such features, we should always “guard” them somehow, i.e., detect their availability at compile-time or (preferably) at runtime, and disable the backend or mark it non-viable. It is ok-ish for users with old kernels to not have access to all backends, as long as there’s always at least one backend they can use.\n\n## Compile-time vs runtime, Linux vs glibc\n\nUnfortunately, both the kernel version used for building and the one used for running affect whether we can use a feature. This means that the availability of a function or flag during build doesn’t mean it will be supported at runtime (this is especially true for the official builds of PyTorch). On the other hand, it also means that even if the runtime kernel supports a feature, we may not be able to use it because we didn’t have access to a system header when building (e.g., to get a flag). While sometimes we can “polyfill” this information, it’s not always doable.\n\nAn additional complication is added by the fact that we typically access syscalls through their glibc wrappers. First of all, this means we only get access to a syscall once glibc wraps it, which could happen years later. But it also means we link to a glibc symbol, and thus to a specific version of glibc’s shared object. With the kernel, using an unsupported feature results in a runtime error when first used, which we can catch; but with glibc we get a loader error due to missing symbols at startup, even if the user doesn’t use TensorPipe, even if we could “tolerate” these symbols’ absence. It is thus desirable at times to avoid the glibc wrappers.\n\n## Common tricks for how to guard/polyfill\n\n* Kernel flags are typically defined as preprocessor flags (i.e., `#define FOO`). This is stuff like `O_TMPFILE`, `MAP_SHARED_VALIDATE`, `PR_SET_PTRACER`, ... It’s easy to detect this in the code, with a `#ifdef FOO`, and since these flags are (usually?) constants, it’s also easy to define them ourselves. This “polyfill” allows us to build on an old kernel but still run on a new one.\n* For a new-ish syscall, we probably don’t want to use the glibc wrapper, for the problems described above, and because it’s hard to detect its availability (the best option is a CMake check whose result we inject as a preprocessor flag). An alternative is to invoke it through the generic `syscall` syscall, using the `SYS_foo` flags. This could bring a few issues on its own (especially for 32bit systems) but for now it hasn’t come to bite us. This way we skip glibc entirely, and simply end up getting ENOSYS if the runtime kernel doesn’t support the syscall. Those `SYS_foo` flags are defined by glibc, but it seems glibc defines them automatically for all the syscalls it “finds” in the kernel, and not just for the syscalls that glibc supports. Unfortunately we cannot “polyfill” the `SYS_foo` flags if we don’t find them, because they have different values on different architectures.\n\n## What do others do?\n\nSince [Apr 2017](https://github.com/libuv/libuv/commit/4e6101388015c6d0879308d566f0a4b79edc0c13), libuv only supports Linux 2.6.32 (December 2009) and glibc 2.12 (May 2010). (This doesn’t mean earlier versions are necessarily broken, but that libuv reserves the right to break them). Libuv seems to be somewhat tied to the RedHat/CentOS releases, which are common and have a very long lifespan. It doesn’t make sense for us to support older versions than what libuv does, because if libuv decides to break them there’s nothing we can do.\n\nPyTorch tries to support the [manylinux2014 platform](https://www.python.org/dev/peps/pep-0599/) (defined by Python for use in PyPI/pip), which allows up to glibc 2.17 (December 2012). However, it’s not clear if we’re there yet, and the previous version is `manylinux2010` which comes with glibc 2.12.\n\nHence a reasonable recommendation seems to be to draw the line at Linux 2.6.32 and glibc 2.12. However, people with older versions than those have already reported issues and asked for fixes, which we can probably consider on a case-by-case basis.\n\n## Kernel features used by TensorPipe\n\n### Linux 2.1.4 (October 1996)\n\n* The `getresuid` and `getresgid` syscalls.\n\n### Linux 2.3.16 (September 1999)\n\n* The `/proc/sys/kernel/random/boot_id` file. See `random(4)`.\n\n  No git hash as it predates the use of git by Linux\n\n  https://github.com/torvalds/linux/blob/1da177e4c3f41524e886b7f1b8a0c1fc7321cac2/drivers/char/random.c#L1270-L1278\n\n### Linux 2.3.20 (October 1999)\n\n* The `PR_GET_DUMPABLE` flag for `prctl`.\n\n  No git hash as it predates the use of git by Linux\n\n  https://github.com/torvalds/linux/blob/1da177e4c3f41524e886b7f1b8a0c1fc7321cac2/include/linux/prctl.h#L10\n\n### Linux 2.6.26 (July 2008)\n\n* Version 3 of Linux capabilities. (Initial capability support, including the `capget` syscall, dates back to Linux 2.1.100, from May 1998). See `capget(2)`.\n\n  https://github.com/torvalds/linux/commit/ca05a99a54db1db5bca72eccb5866d2a86f8517f\n\n### Linux 3.2 (January 2012)\n\n* Cross-Memory Attach (i.e., the `process_vm_readv` syscall). See `process_vm_readv(2)`.\n\n  https://github.com/torvalds/linux/commit/fcf634098c00dd9cd247447368495f0b79be12d1\n\n### Linux 3.4 (May 2012)\n\n* The YAMA security module, and thus the `/proc/sys/kernel/yama/ptrace_scope` file. This includes the `PR_SET_PTRACER` and the `PR_SET_PTRACER_ANY` flags for `prctl`. See `ptrace(2)`.\n\n  https://github.com/torvalds/linux/commit/2d514487faf188938a4ee4fb3464eeecfbdcf8eb\n  https://github.com/torvalds/linux/commit/bf06189e4d14641c0148bea16e9dd24943862215\n\n### Linux 3.8 (February 2013)\n\n* The `/proc/[pid]/ns/[ns]` files. Although that directory, and the `net` file therein, were already present in 3.0, the `pid` and `user` ones only arrived in 3.8 and, more importantly, the ability to identify a namespace by the inode number of those files came in 3.8 (when they stopped being hardlinks and became symlinks). See `proc(5)` and `namespaces(7)` and others.\n\n  https://github.com/torvalds/linux/commit/6b4e306aa3dc94a0545eb9279475b1ab6209a31f\n  https://github.com/torvalds/linux/commit/13b6f57623bc485e116344fe91fbcb29f149242b\n  https://github.com/torvalds/linux/commit/57e8391d327609cbf12d843259c968b9e5c1838f\n  https://github.com/torvalds/linux/commit/cde1975bc242f3e1072bde623ef378e547b73f91\n  https://github.com/torvalds/linux/commit/bf056bfa80596a5d14b26b17276a56a0dcb080e5\n  https://github.com/torvalds/linux/commit/98f842e675f96ffac96e6c50315790912b2812be\n\n### Linux 3.11 (September 2013)\n\n* The `O_TMPFILE` flag for `open`. See `open(2)`.\n\n  https://github.com/torvalds/linux/commit/60545d0d4610b02e55f65d141c95b18ccf855b6e\n\n### Linux 3.17 (October 2014)\n\n* The `memfd_create` syscall. See `memfd_create(2)`.\n\n  https://github.com/torvalds/linux/commit/9183df25fe7b194563db3fec6dc3202a5855839c\n\n### Linux 4.11 (April 2017)\n\n* The `/sys/kernel/security/lsm` file in `securityfs` (a list of active Linux Security Modules).\n\n  https://github.com/torvalds/linux/commit/d69dece5f5b6bc7a5e39d2b6136ddc69469331fe\n\n### TODO\n\n* All that sysfs PCIe stuff done by CUDA GDR (e.g., resolving GPUs and NICs to PCIe paths, getting the BAR1 size, ...), plus checking the nv_mem_peer module\n\n## Glibc features required by TensorPipe\n\n### Glibc 2.2.5 (January 2002)\n\n* The `capget` function.\n\n### Glibc 2.3.3 (December 2003)\n\n* The `dlinfo` function. (All of `dlopen`, `dlclose`, `dlsym` and `dlerror` were present since at least glibc 2.0).\n\n### Glibc 2.12 (May 2010)\n\n* The `pthread_setname_np` function.\n"
  },
  {
    "path": "docs/shm.md",
    "content": "# The shm transport\n\nThis document is an attempt to capture the design principles and inner\nworking of the shm transport (see `tensorpipe/transport/shm`). Its\nperformance makes it an efficient alternative to IP based transports\nfor same-machine communication.\n\nAt the core of a transport implementation lies a listener, a\nconnection, and a context. Listeners accept connections. Contexts\ncreate listeners and can connect to remote listeners.\n\n## Concepts\n\n\n### Ring buffers\n\nShared memory ring buffers are a core building block for the shm\ntransport. They are implemented with split control and data\nsections. This means the data section can be fully aligned. The header\nsection stores a read/write transaction flag and the head and tail\noffsets into the data section. Producers and consumers of the ring\nbuffer use atomic instructions to mutate this header depending on\ntheir intent.\n\n### File descriptors\n\nThe header and data segments of a shared memory ring buffer are\ncreated as follows. First, a file is created in `/dev/shm` with the\n`O_TMPFILE` flag. This means that anything written to the resulting\nfile is lost when the last file descriptor is closed, unless the file\nis given a name. Because we never give this file a name, the segment\nis automatically cleaned up when the last process that has its file\ndescriptor terminates.\n\nPer above, creating a shared memory ring buffer yields 2 file\ndescriptors, one for the header segment and one for the data segment.\nThese file descriptors are shared over a Unix domain socket.\n\n### The reactor\n\nThis is a TensorPipe specific component. It uses a shared memory ring\nbuffer to allow other processes to trigger functions. If a process wants\nanother process to trigger a function, it registers this function with\nthe reactor, and gets back a 32-bit token. Then, the file descriptors of\nthe reactor's ring buffer, as well as the token, are sent to another\nprocess. The other process can now map the reactor ring buffer, and\ntrigger the registered function by writing the token to the ring buffer.\n\nSee [considerations](#considerations) below on why this was used.\n\n### Unix domain sockets\n\nCoordination between process to bootstrap a connection that uses\nshared memory ring buffers is implemented using Unix domain sockets.\nThe listening side of a connection binds and listens on an abstract\nsocket address. A typical Unix domain socket \"address\" is a filesystem\npathname. An abstract socket address, by contrast, is not visible on\nany filesystem. They exist in a single abstract socket namespace\nshared by all processes on the machine. Removing the filesystem\ndependency means two things:\n\n1. (+) It is not necessary to purge stale Unix domain socket files.\n2. (-) These sockets don't have permissions, so any process that has\n   its name can connect.\n\nRead more about abstract domain sockets [here][1] and [here][2].\n\n[1]: http://man7.org/linux/man-pages/man7/unix.7.html\n[2]: https://utcc.utoronto.ca/~cks/space/blog/linux/SocketAbstractNamespace\n\nOnce processes have established a Unix domain socket, it is used to:\n\n1. Pass the shared memory file descriptors to a peer process.\n2. Signal peer termination (through eof on socket closure).\n3. ... nothing else. All data moves through the ring buffers.\n\n**Note:** abstract socket addresses are a Linux specific feature.\n\n## Bringing it together\n\nSo, to establish one of these shared memory connections, we first\nlisten on some unique abstract socket address. This address must be\nknown to the process that wishes to connect. For a quick test we can\nuse a pre-shared address. Otherwise, we can generate a UUID and share\nit with some out of band mechanism. The connecting process connects\nand the listening process accepts. We have now established a Unix\ndomain socket and move on to the next step.\n\nEach process creates a new shared memory ring buffer specifically for\nthis connection. We refer to this ring buffer as the _inbox_. We\nexpect each process to be pinned to a specific NUMA node and perform\nthe memory allocation in the same NUMA domain.\n\nThe file descriptors of the inbox, the file descriptors of the\nreactor, and a token to trigger readability of the inbox, are shared\nover the socket.\n\nEach process receives file descriptors from their peer and initializes\nthe corresponding ring buffers. The peer's inbox is referred to as the\n_outbox_. The token to trigger remote readability is referred to as\nthe _outbox trigger_.\n\nThe connection is now established! Writes are performed by writing\ndirectly into the outbox and triggering the outbox trigger. The\ntrigger wakes up the peer's reactor and executes a function that\nnotifies the connection of readability. Subsequently, the connection\nchecks if there was a pending read operation, and processes it if so.\n\nWhen either process destructs the connection, or crashes, the original\nUnix domain socket is closed, which signals the peer process that it\nshouldn't expect more writes to its inbox and can destruct the\nconnection as well.\n\n## Considerations\n\nA single process may have multiple connections. Therefore, it may have\nmultiple inbox ring buffers. One way to react to incoming writes is to\nsimply check if there are any bytes to read. This requires checking all\nN inboxes for reads, which can become problematic if N gets large. To\nbetter solve this multiplexing problem we initially used an\n[`eventfd(2)`][eventfd] per inbox. This file descriptor was registered\nwith the existing [`epoll(7)`][epoll] loop and would trigger the\nreadability function when it became readable. To perform a write, the\npeer process would first write to the outbox and then write to the\npeer's eventfd.\n\n[eventfd]: http://man7.org/linux/man-pages/man2/eventfd.2.html\n[epoll]: http://man7.org/linux/man-pages/man7/epoll.7.html\n\nA simple ping/pong performance benchmark using this approach, with both\nprocesses pinned to the same NUMA node, showed a lower bound latency of\n~12 microseconds. This seemed high for a pair of ring buffer writes, so\nwe explored alternatives, and came up with the reactor approach. Now,\nthe same benchmark runs with a lower bound latency of about ~1.7\nmicroseconds, which is a 7x improvement over the `eventfd(2)`/`epoll(7)`\napproach.\n"
  },
  {
    "path": "docs/thread_model.md",
    "content": "# TensorPipe's thread model\n\nTensorPipe is spawning multiple threads internally. This is a design\nrequirement as, for example, a single thread wouldn't manage to drive a\nmodern network interface card (NIC) at capacity and saturate its\nbandwidth, even if it did nothing by write on the socket: multiple\nthreads writing in parallel to multiple sockets are the only way to\nachieve that.\n\nMoreover, the possibility of spawning new threads when needed allows\nfor a simpler architecture in the implementation of TensorPipe's\nmodular approach to backends (transports and channels): if one of these\nbackends needs to perform some heavy operation (a blocking syscall, an\nevent loop, ...) it can launch a dedicated thread for it rather than\nhaving to schedule it on the user thread or on a shared thread pool,\nthus having to \"fit\" the operation into some framework.\n\nThis heavy reliance on multi-threading poses of course challenges in\ncoordination and robustness. This document aims to outline the patterns\nwe've ended up adopting to have a structured and principled design\naround this.\n\n## Callbacks\n\nTensorPipe uses callbacks to organize the control flow around\nasynchronous and deferred execution. While this may be an anti-pattern\nleading to so-called \"spaghetti code\" or \"callback hell\", we realized\nthat it was the only approach that would yield the performance we need.\nModern alternatives to callbacks (promises/futures, coroutines, ...) \nwould have introduced an unacceptable overhead in some cases.\n\nNearly all operations in TensorPipe are non-blocking and are performed\nasynchronously, in background, with their results notified through\ncallbacks. This includes the creation of pipes and connections (the\nobjects may still be performing initialization when they are given to\nthe user and, although operations can be performed on them, these will\nbe delayed until setup completes). And it also includes destruction,\nwhich means that internal resources may not be immediately freed when a\nuser-facing object is deleted. The only synchronization point that\nallows the user to wait for such cleanup to finish is the context's\n`join` method. Some other methods that may occasionally wait are the\nones that return a value, for example the ones to retrieve addresses.\n\n## Shared pointers\n\nAs soon as threads and callbacks enter the mix, race conditions start\nto pop up. Among the first ones, there's the problem of ownership:\nideally we want a `unique_ptr`-style semantics, where each object has a\nclear owner who controls its lifetime. However, when this owner asks\nanother thread to perform an operation on that object as part of a\ncallback, that callback also (temporarily) needs access to the object.\nAs there may be multiple operations with multiple callbacks at the same\ntime, transferring ownership isn't an option, and sharing it is the\nonly way to go. This however requires synchronization among the various\nusers: if the \"real\" user had a `unique_ptr` and gave raw pointers to\nthe callbacks, the real user may delete the object without the\ncallbacks noticing or having any way to stop/delay it. This would then\ncause use-after-free errors. There must thus be a sort of \"lock\" that\nprevents the object from being deleted while someone is working on it,\nlike a \"semaphore\" counting the users. It turns out a perfect tool for\nthe job is `shared_ptr`. Acquiring a lock on the object corresponds to\nobtaining a `shared_ptr` instance, which increases the reference count.\nThe object will only be deleted when its refcount reaches zero, which\nmeans all its users (the \"real\" ones and the callbacks) have stopped\nusing the object.\n\nWe have however solved a problem by creating an opposite one: a memory\nleak. Imagine an object (say, a pipe) that is the \"real\" owner of\nanother one (say, a channel) from which it is expecting a callback, and\nthat callback captures a `shared_ptr` to the first object in its\nclosure. This is a reference cycle. It means that even if the \"real\"\nowner of the first object relinquishes its `shared_ptr`, the objects\nwon't be destroyed until the callback fires (if ever). An easy solution\nto this is to have callbacks only keep a `shared_ptr` when they are\nrunning, not while they are waiting. Again, the standard library has\nthe perfect tool for the job: the `weak_ptr`, which will keep the\nrefcount unchanged but can be \"locked\" to obtain a real `shared_ptr`\nwhen needed (curious coincidence that the terminology aligns with our).\n\nSo, in short: the real owner of an object keeps a `shared_ptr` to it,\nit passes `weak_ptr`s to be stored in callbacks, and these are locked\nback to `shared_ptr`s just before running the callbacks. (If locking\nfails, the callback isn't run).\n\n## Public objects vs private implementations\n\nIt turns out that what we said above isn't always true: in some cases\nwe may want a callback to keep the object alive until it has fired.\nThis happens because some callbacks are one half of a \"contract\"\nregarding data ownership: throughout the API (at higher and lower\nlevels), `read`, `write`, `send` and `recv` methods take some data\n(source or destination buffers), and by doing so the caller hands over\ncontrol of the data to the object. The way for the object to yield\nownership back to the caller is by invoking the callback. We must thus\nensure that these callbacks are always called. However, we must also\navoid calling them when we're not ready yet to give up access to the\ndata. For a more concrete example, consider the user trying to destroy\na pipe that has a pending write operation, while some other thread is\nsimultaneously performing a memory copy as part of that write\noperation. If we invoke the write operation's callback before aborting\nthe memory copy we're giving the user the right to deallocate the\nbuffer, which may lead the other thread to segfault.\n\nHere is what needs to happen: when a user deletes a pipe, all its\npending operations must be interrupted, which in turn also aborts the\nlower level operations; the pipe's callbacks, however, must not be\nfired and instead kept alive while waiting for the lower level\noperations to wrap up, and only then they can be triggered. This shows\nthat a subset of the pipe, containing at least the callbacks, must\nsurvive the destruction of the whole pipe. In other words, the lifetime\nof the inner part must be detacheable from the one of the outer shell.\n\nIn order to do so, most public objects are just thin wrappers around a\nsingle member field, which is just a pointer to an instance of a\nprivate \"implementation\" (abbreviated as impl), which is where\neverything happens. The impl is a `shared_ptr` so that its life cycle\ncan be detached and extended with respect to the one of the public\nobject. The callbacks that we must wait for in order to regain control\nof some resource also capture a `shared_ptr`. This way we can still get\nthe \"signal\" from when the public object is deleted (and can start\nterminating pending operations) but we're also able to keep the impl\naround while wait for the shut down to complete.\n\n## Locking\n\nObjects can be accessed and worked on from many threads, from all\ndirections, above (user threads, higher up the stack) and below \n(low-level backend threads). To avoid race conditions on the internal\nstate of these object, we must have mutual exclusion between threads,\nusing locks. While it may be possible to have separate fine-grained\nlocks for different parts of some objects, in general it is safer\nand easier to have one mutex per object, and use it to lock all\noperations.\n\nThat's easily said, but it just as easily leads to deadlocks, which in\nour experience come in two flavors:\n\n- When an object (holding its own lock) calls a \"upward\" callback which\n  (inline/serially) tries to perform an operation on that same object,\n  which tries to acquire the same lock. This is a perfectly legitimate\n  behavior, since all of our callbacks are \"one-shot\", that is, they\n  \"burn out\" after they fire and thus must be immediately rearmed.\n\n- When an object (holding its own lock) performs an operation on a\n  lower level object, passing a callback to it, and this callback is\n  called immediately (inline/serially) and tries to also acquire the\n  lock of the first object. This typically happens when the lower level\n  object is in an error state and can thus \"shortcut\" the operation and\n  immediately trigger the callback instead of deferring it to a thread.\n\nMitigations for these problems are possible but none is universal and\nthey all have drawbacks. Examples are:\n\n- When calling upward callbacks, extract one from the object onto the\n  stack, put the object in a consistent state, release its lock and\n  then call the callback. This works but there's a racing risk which\n  would cause callbacks to not be called in their intended order.\n\n- Have a dedicated thread from which to invoke callbacks. Therefore\n  other threads, instead of triggering callbacks, push them to some\n  queue that is consumed by this thread. This resembles the semi-future\n  and executor pattern. We used to have such a pattern in place for\n  calling the pipe callbacks but it was introducing an unacceptable\n  latency overhead.\n\n- The backends already typically have a thread they can defer callbacks\n  to, and for the most part they already do. However having such a\n  thread isn't necessarily a requirement for a transport, and such\n  threads may not be running at all times (e.g., once a backend has\n  been joined).\n\n- We could replace regular locks with reentrant locks (also called\n  recursive). This is typically considered bad practice, though, and\n  when at some point we tried this we indeed hit problems.\n\nThe next section presents a more disciplined way of dealing with races.\n\n## Event loops\n\nA classic way of dealing with parallel I/O is event loops: repeatedly\npolling a set of file descriptors for readability/writability (blocking\nto wait for them to become ready), dealing with them, and repeating.\nSyscalls to do this are `select`, `epoll`, and more. The `libuv`\nlibrary used by one of TensorPipe's transports is also based on an\nevent loop. Event loops are typically single-threaded, and they allow\nto \"simulate\" parallelism by multiplexing thread if those threads would\nspend most of their time doing blocking I/O.\n\nThe simplicity of event loops, their single-threaded safety and their\nestablished effectiveness prompted us to make them a foundation of our\nthreading model.\n\nIf an object already has a thread to which it offloads some operations\n(this is the case for most transports and some channels, but not the\npipe) then we defer all operations to it. And we really mean all of\nthem: all manipulation of the object (scheduling operations, querying\ninformation, running callbacks) must be done from within that event\nloop thread. All operations that are attempted on the object, either\nfrom another thread or from within the event loop thread (for example,\nby a callback in user code) are deferred, appended to a queue, and\ndealt with at a later iteration of the loop. This guarantees that we'll\nalways have a single thread accessing such objects, thus ensuring\nthread safety without even using any locks. Note that such design isn't\na requirement for transports, it's just the pattern that we've adopted\nfor all our current transports.\n\nIf, on the other hand, an object does not have access to a thread to\nuse as an event loop, we'll \"borrow\" the caller's thread and\ntemporarily use it as an event loop. We'll similarly have a queue of\ntasks, and the thread will consume them one by one, until none are\nleft, at which point we'll stop occupying the thread and release it\nback to the caller. If any new operation is attempted by another thread\nwhile one of these temporary event loops is running, that operation is\nadded to the queue and thus deferred to the already-running event loop,\nwith the new thread immediately able to return to what it was doing.\n"
  },
  {
    "path": "setup.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\nimport os\nimport subprocess\nimport sys\nfrom pathlib import Path\n\nfrom setuptools import Extension, setup\nfrom setuptools.command.build_ext import build_ext\n\n\nclass CMakeBuild(build_ext):\n    def run(self):\n        for ext in self.extensions:\n            self.build_extension(ext)\n\n    def build_extension(self, ext):\n        if not os.path.exists(self.build_temp):\n            os.makedirs(self.build_temp)\n\n        source_path = Path(__file__).parent.resolve()\n        output_path = Path(self.get_ext_fullpath(ext.name)).parent.resolve()\n        build_type = \"Debug\" if self.debug else \"Release\"\n\n        cmake_cmd = [\n            \"cmake\",\n            f\"{source_path}\",\n            f\"-DCMAKE_LIBRARY_OUTPUT_DIRECTORY={output_path}\",\n            f\"-DPYTHON_EXECUTABLE={sys.executable}\",\n            f\"-DCMAKE_BUILD_TYPE={build_type}\",\n            \"-DCMAKE_C_COMPILER=clang-6.0\",\n            \"-DCMAKE_CXX_COMPILER=clang++-6.0\",\n            \"-DCMAKE_POSITION_INDEPENDENT_CODE=true\",\n            \"-DTP_BUILD_PYTHON=true\",\n        ]\n\n        for opt in os.environ:\n            if opt.startswith(\"TP_\"):\n                cmake_cmd.append(f\"-D{opt}={os.environ[opt]}\")\n\n        make_cmd = [\"make\", \"-j\", \"pytensorpipe\"]\n\n        subprocess.check_call(cmake_cmd, cwd=self.build_temp)\n        subprocess.check_call(make_cmd, cwd=self.build_temp)\n\n\nsetup(\n    name=\"tensorpipe\",\n    version=\"0.0.0\",\n    author=\"Facebook AI Research\",\n    ext_modules=[Extension(\"pytensorpipe\", sources=[])],\n    cmdclass={\"build_ext\": CMakeBuild},\n    zip_safe=False,\n)\n"
  },
  {
    "path": "tensorpipe/.clang-format",
    "content": "---\nAccessModifierOffset: -1\nAlignAfterOpenBracket: AlwaysBreak\nAlignConsecutiveAssignments: false\nAlignConsecutiveDeclarations: false\nAlignEscapedNewlinesLeft: true\nAlignOperands:   false\nAlignTrailingComments: false\nAllowAllParametersOfDeclarationOnNextLine: false\nAllowShortBlocksOnASingleLine: false\nAllowShortCaseLabelsOnASingleLine: false\nAllowShortFunctionsOnASingleLine: Empty\nAllowShortIfStatementsOnASingleLine: false\nAllowShortLoopsOnASingleLine: false\nAlwaysBreakAfterReturnType: None\nAlwaysBreakBeforeMultilineStrings: true\nAlwaysBreakTemplateDeclarations: true\nBinPackArguments: false\nBinPackParameters: false\nBraceWrapping:\n  AfterClass:      false\n  AfterControlStatement: false\n  AfterEnum:       false\n  AfterFunction:   false\n  AfterNamespace:  false\n  AfterObjCDeclaration: false\n  AfterStruct:     false\n  AfterUnion:      false\n  BeforeCatch:     false\n  BeforeElse:      false\n  IndentBraces:    false\nBreakBeforeBinaryOperators: None\nBreakBeforeBraces: Attach\nBreakBeforeTernaryOperators: true\nBreakConstructorInitializersBeforeComma: false\nBreakAfterJavaFieldAnnotations: false\nBreakStringLiterals: false\nColumnLimit:     80\nCommentPragmas:  '^ IWYU pragma:'\nCompactNamespaces: false\nConstructorInitializerAllOnOneLineOrOnePerLine: true\nConstructorInitializerIndentWidth: 4\nContinuationIndentWidth: 4\nCpp11BracedListStyle: true\nDerivePointerAlignment: false\nDisableFormat:   false\nForEachMacros:   [ FOR_EACH_RANGE, FOR_EACH, ]\nIncludeCategories:\n  - Regex:           '^<.*\\.h(pp)?>'\n    Priority:        1\n  - Regex:           '^<.*'\n    Priority:        2\n  - Regex:           '.*'\n    Priority:        3\nIndentCaseLabels: true\nIndentWidth:     2\nIndentWrappedFunctionNames: false\nKeepEmptyLinesAtTheStartOfBlocks: false\nMacroBlockBegin: ''\nMacroBlockEnd:   ''\nMaxEmptyLinesToKeep: 1\nNamespaceIndentation: None\nObjCBlockIndentWidth: 2\nObjCSpaceAfterProperty: false\nObjCSpaceBeforeProtocolList: false\nPenaltyBreakBeforeFirstCallParameter: 1\nPenaltyBreakComment: 300\nPenaltyBreakFirstLessLess: 120\nPenaltyBreakString: 1000\nPenaltyExcessCharacter: 1000000\nPenaltyReturnTypeOnItsOwnLine: 2000000\nPointerAlignment: Left\nReflowComments:  true\nSortIncludes:    true\nSpaceAfterCStyleCast: false\nSpaceBeforeAssignmentOperators: true\nSpaceBeforeParens: ControlStatements\nSpaceInEmptyParentheses: false\nSpacesBeforeTrailingComments: 1\nSpacesInAngles:  false\nSpacesInContainerLiterals: true\nSpacesInCStyleCastParentheses: false\nSpacesInParentheses: false\nSpacesInSquareBrackets: false\nStandard:        Cpp11\nTabWidth:        8\nUseTab:          Never\n...\n"
  },
  {
    "path": "tensorpipe/.clang-tidy",
    "content": "---\nInheritParentConfig: true\nChecks: '\nreadability-identifier-naming,\nreadability-inconsistent-declaration-parameter-name,\nreadability-named-parameter,\n'\nFormatStyle: file\nCheckOptions:\n# Names of classes (and structs?)\n- { key: readability-identifier-naming.ClassCase, value: CamelCase }\n# Names of enums and enum classes\n- { key: readability-identifier-naming.EnumCase, value: CamelCase }\n# Names of members and methods of classes (and structs?)\n- { key: readability-identifier-naming.MemberCase, value: camelBack }\n- { key: readability-identifier-naming.PrivateMemberCase, value: camelBack }\n- { key: readability-identifier-naming.PrivateMemberSuffix, value: '_' }\n- { key: readability-identifier-naming.ProtectedMemberCase, value: camelBack }\n- { key: readability-identifier-naming.ProtectedMemberSuffix, value: '_' }\n- { key: readability-identifier-naming.MethodCase, value: camelBack }\n# Names of parameters and local variables\n- { key: readability-identifier-naming.LocalVariableCase, value: camelBack }\n- { key: readability-identifier-naming.ParameterCase, value: camelBack }\n# Names of constants\n- { key: readability-identifier-naming.GlobalConstantCase, value: CamelCase }\n- { key: readability-identifier-naming.GlobalConstantPrefix, value: 'k' }\n# FIXME scoped enums are only supported in clang-tidy 12.\n# Names of (non-class) enum members\n# - { key: readability-identifier-naming.EnumConstantCase, value: UPPER_CASE }\n# Names of enum class members\n# - { key: readability-identifier-naming.ScopedEnumConstantCase, value: CamelCase }\n# - { key: readability-identifier-naming.ScopedEnumConstantPrefix, value: 'k' }\n# Names of template parameters\n- { key: readability-identifier-naming.TemplateParameterCase, value: CamelCase }\n# Names of global functions\n- { key: readability-identifier-naming.FunctionCase, value: camelBack }\n# Names of namespaces\n- { key: readability-identifier-naming.NamespaceCase, value: lower_case }\n...\n"
  },
  {
    "path": "tensorpipe/CMakeLists.txt",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\n# TP_SRCS is the list of source files that we need to build libtensorpipe.\nset(TP_SRCS)\n\n# TP_PUBLIC_HDRS is the list of public header files that we need to install.\nset(TP_PUBLIC_HDRS)\n\n# TP_LINK_LIBRARIES is list of dependent libraries to be linked\nset(TP_LINK_LIBRARIES)\n\n# TP_INCLUDE_DIRS is list of include path to be used\nset(TP_INCLUDE_DIRS)\n\nlist(APPEND TP_SRCS\n  channel/error.cc\n  channel/helpers.cc\n  common/address.cc\n  common/allocator.cc\n  common/error.cc\n  common/fd.cc\n  common/socket.cc\n  common/system.cc\n  core/context.cc\n  core/context_impl.cc\n  core/error.cc\n  core/listener.cc\n  core/listener_impl.cc\n  core/pipe.cc\n  core/pipe_impl.cc\n  transport/error.cc)\n\nlist(APPEND TP_PUBLIC_HDRS\n  tensorpipe.h\n  channel/context.h\n  channel/error.h\n  common/buffer.h\n  common/cpu_buffer.h\n  common/device.h\n  common/error.h\n  common/optional.h\n  core/context.h\n  core/error.h\n  core/listener.h\n  core/message.h\n  core/pipe.h\n  transport/context.h\n  transport/error.h)\n\nlist(APPEND TP_INCLUDE_DIRS\n  $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>\n  $<BUILD_INTERFACE:${PROJECT_BINARY_DIR}>\n  $<INSTALL_INTERFACE:${TP_INSTALL_INCLUDEDIR}>)\n\n\n## Channels\n\n### basic\n\nlist(APPEND TP_SRCS\n  channel/basic/channel_impl.cc\n  channel/basic/context_impl.cc\n  channel/basic/factory.cc)\nlist(APPEND TP_PUBLIC_HDRS\n  channel/basic/factory.h)\n\n### xth\n\nlist(APPEND TP_SRCS\n  channel/xth/channel_impl.cc\n  channel/xth/context_impl.cc\n  channel/xth/factory.cc)\nlist(APPEND TP_PUBLIC_HDRS\n  channel/xth/factory.h)\n\n### cma\n\ntp_conditional_backend(\n  TP_ENABLE_CMA \"Enable cross-memory attach channel\" \"LINUX\")\nif(TP_ENABLE_CMA)\n  list(APPEND TP_SRCS\n    channel/cma/channel_impl.cc\n    channel/cma/context_impl.cc\n    channel/cma/factory.cc)\n  list(APPEND TP_PUBLIC_HDRS\n    channel/cma/factory.h)\n  set(TENSORPIPE_HAS_CMA_CHANNEL 1)\nendif()\n\n### mpt\n\nlist(APPEND TP_SRCS\n  channel/mpt/channel_impl.cc\n  channel/mpt/context_impl.cc\n  channel/mpt/factory.cc)\nlist(APPEND TP_PUBLIC_HDRS\n  channel/mpt/factory.h)\n\n## Transports\n\n### uv\n\nlist(APPEND TP_SRCS\n  transport/uv/connection_impl.cc\n  transport/uv/context_impl.cc\n  transport/uv/error.cc\n  transport/uv/factory.cc\n  transport/uv/listener_impl.cc\n  transport/uv/loop.cc\n  transport/uv/sockaddr.cc\n  transport/uv/utility.cc)\nlist(APPEND TP_PUBLIC_HDRS\n  transport/uv/error.h\n  transport/uv/factory.h\n  transport/uv/utility.h)\n\n# Add uv package\nfind_package(uv REQUIRED)\nlist(APPEND TP_LINK_LIBRARIES uv::uv)\n\n### shm\n\ntp_conditional_backend(\n  TP_ENABLE_SHM \"Enable shared-memory transport\" \"LINUX\")\nif(TP_ENABLE_SHM)\n  list(APPEND TP_SRCS\n    common/epoll_loop.cc\n    common/shm_segment.cc\n    transport/shm/connection_impl.cc\n    transport/shm/context_impl.cc\n    transport/shm/factory.cc\n    transport/shm/listener_impl.cc\n    transport/shm/reactor.cc\n    transport/shm/sockaddr.cc)\n  list(APPEND TP_PUBLIC_HDRS\n    transport/shm/factory.h)\n  set(TENSORPIPE_HAS_SHM_TRANSPORT 1)\nendif()\n\n### ibv\n\ntp_conditional_backend(\n  TP_ENABLE_IBV \"Enable InfiniBand transport\" \"LINUX\")\nif(TP_ENABLE_IBV)\n  list(APPEND TP_SRCS\n    common/epoll_loop.cc\n    common/ibv.cc\n    transport/ibv/connection_impl.cc\n    transport/ibv/context_impl.cc\n    transport/ibv/error.cc\n    transport/ibv/factory.cc\n    transport/ibv/listener_impl.cc\n    transport/ibv/reactor.cc\n    transport/ibv/sockaddr.cc\n    transport/ibv/utility.cc)\n  list(APPEND TP_PUBLIC_HDRS\n    transport/ibv/error.h\n    transport/ibv/factory.h\n    transport/ibv/utility.h)\n  set(TENSORPIPE_HAS_IBV_TRANSPORT 1)\nendif()\n\n\n## MAC OS specific library deps\n\nif(APPLE)\n  find_library(CF CoreFoundation)\n  find_library(IOKIT IOKit)\n  list(APPEND TP_LINK_LIBRARIES ${CF} ${IOKIT})\nendif()\n\n\n## Config\n\nconfigure_file(config.h.in config.h)\n\n\n## Libnop\n\n# We should keep libnop headers private as they should not be exposed to downstream users,\n# but they're currently transitively included by tensorpipe/transport/connection.h (which\n# is still unclear whether it should be a public or private header).\nlist(APPEND TP_INCLUDE_DIRS $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/third_party/libnop/include>)\n\n\n## Target\n\n# Add the tensorpipe library target\nadd_library(tensorpipe ${TP_STATIC_OR_SHARED} ${TP_SRCS})\n\n# Set target properties\nif(BUILD_SHARED_LIBS)\n  set_target_properties(tensorpipe PROPERTIES POSITION_INDEPENDENT_CODE 1)\nendif()\n\n# Add all the link libraries and include directories to the tensorpipe target and keeping the link PUBLIC\ntarget_link_libraries(tensorpipe PRIVATE ${TP_LINK_LIBRARIES})\ntarget_include_directories(tensorpipe PUBLIC ${TP_INCLUDE_DIRS})\n\n\n## Install\n\ninstall(TARGETS tensorpipe\n        EXPORT TensorpipeTargets\n        LIBRARY DESTINATION ${TP_INSTALL_LIBDIR}\n        ARCHIVE DESTINATION ${TP_INSTALL_LIBDIR})\n\nforeach(_header_file ${TP_PUBLIC_HDRS})\n  get_filename_component(_TP_HEADER_SUBDIR \"${_header_file}\" DIRECTORY)\n  install(FILES ${_header_file}\n          DESTINATION ${TP_INSTALL_INCLUDEDIR}/tensorpipe/${_TP_HEADER_SUBDIR})\nendforeach()\n\ninstall(FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h\n        DESTINATION ${TP_INSTALL_INCLUDEDIR}/tensorpipe)\n\n\n## CUDA\n\nif(TP_USE_CUDA)\n  # TP_SRCS is the list of source files that we need to build libtensorpipe.\n  set(TP_CUDA_SRCS)\n\n  # TP_PUBLIC_HDRS is the list of public header files that we need to install.\n  set(TP_CUDA_PUBLIC_HDRS)\n\n  # TP_LINK_LIBRARIES is list of dependent libraries to be linked\n  set(TP_CUDA_LINK_LIBRARIES)\n\n  # TP_INCLUDE_DIRS is list of include path to be used\n  set(TP_CUDA_INCLUDE_DIRS)\n\n  find_package(CUDA REQUIRED)\n  list(APPEND TP_CUDA_LINK_LIBRARIES ${CUDA_LIBRARIES})\n  list(APPEND TP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})\n\n  list(APPEND TP_CUDA_SRCS\n    common/cuda_buffer.cc)\n  list(APPEND TP_CUDA_PUBLIC_HDRS\n    tensorpipe_cuda.h\n    common/cuda_buffer.h)\n\n  ### cuda_xth\n\n  list(APPEND TP_CUDA_SRCS\n    channel/cuda_xth/channel_impl.cc\n    channel/cuda_xth/context_impl.cc\n    channel/cuda_xth/factory.cc)\n  list(APPEND TP_CUDA_PUBLIC_HDRS\n    channel/cuda_xth/factory.h)\n\n  ### cuda_basic\n\n  list(APPEND TP_CUDA_SRCS\n    channel/cuda_basic/channel_impl.cc\n    channel/cuda_basic/context_impl.cc\n    channel/cuda_basic/factory.cc\n    common/cuda_loop.cc)\n  list(APPEND TP_CUDA_PUBLIC_HDRS\n    channel/cuda_basic/factory.h)\n\n  ### cuda_ipc\n\n  tp_conditional_backend(\n    TP_ENABLE_CUDA_IPC \"Enable CUDA inter-process communication channel\" \"TP_USE_CUDA\")\n  if(TP_ENABLE_CUDA_IPC)\n    list(APPEND TP_CUDA_SRCS\n      channel/cuda_ipc/channel_impl.cc\n      channel/cuda_ipc/context_impl.cc\n      channel/cuda_ipc/factory.cc)\n    list(APPEND TP_CUDA_PUBLIC_HDRS\n      channel/cuda_ipc/factory.h)\n    set(TENSORPIPE_HAS_CUDA_IPC_CHANNEL 1)\n  endif()\n\n  ### cuda_gdr\n\n  tp_conditional_backend(\n    TP_ENABLE_CUDA_GDR \"Enable CUDA GpuDirect (InfiniBand) channel\" \"LINUX\")\n  if(TP_ENABLE_CUDA_GDR)\n    list(APPEND TP_CUDA_SRCS\n      common/ibv.cc\n      channel/cuda_gdr/channel_impl.cc\n      channel/cuda_gdr/context_impl.cc\n      channel/cuda_gdr/factory.cc)\n    list(APPEND TP_CUDA_PUBLIC_HDRS\n      channel/cuda_gdr/error.h\n      channel/cuda_gdr/factory.h)\n    set(TENSORPIPE_HAS_CUDA_GDR_CHANNEL 1)\n  endif()\n\n  configure_file(config_cuda.h.in config_cuda.h)\n\n  add_library(tensorpipe_cuda ${TP_STATIC_OR_SHARED} ${TP_CUDA_SRCS})\n\n  if(BUILD_SHARED_LIBS)\n    set_target_properties(tensorpipe_cuda PROPERTIES POSITION_INDEPENDENT_CODE 1)\n  endif()\n\n  target_link_libraries(tensorpipe_cuda PUBLIC tensorpipe)\n  target_link_libraries(tensorpipe_cuda PRIVATE ${TP_CUDA_LINK_LIBRARIES})\n  target_include_directories(tensorpipe_cuda PUBLIC ${TP_CUDA_INCLUDE_DIRS})\n\n  install(TARGETS tensorpipe_cuda\n          EXPORT TensorpipeTargets\n          LIBRARY DESTINATION ${TP_INSTALL_LIBDIR}\n          ARCHIVE DESTINATION ${TP_INSTALL_LIBDIR})\n\n  foreach(_header_file ${TP_CUDA_PUBLIC_HDRS})\n    get_filename_component(_TP_HEADER_SUBDIR \"${_header_file}\" DIRECTORY)\n    install(FILES ${_header_file}\n            DESTINATION ${TP_INSTALL_INCLUDEDIR}/tensorpipe/${_TP_HEADER_SUBDIR})\n  endforeach()\n\n  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/config_cuda.h\n          DESTINATION ${TP_INSTALL_INCLUDEDIR}/tensorpipe)\n\nendif()\n\n\n## Python bindings\n\nif(TP_BUILD_PYTHON)\n  add_subdirectory(python)\nendif()\n\n\n## Benchmarks\n\nif (TP_BUILD_BENCHMARK)\n  add_subdirectory(benchmark)\nendif()\n\n\n## Misc tools\n\nif (TP_BUILD_MISC)\n  add_subdirectory(misc)\nendif()\n\n\n## Tests\n\nif(TP_BUILD_TESTING)\n  add_subdirectory(test)\nendif()\n"
  },
  {
    "path": "tensorpipe/benchmark/CMakeLists.txt",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\n# TODO: Make those separate CMake projects.\n\nadd_executable(benchmark_transport benchmark_transport.cc options.cc transport_registry.cc)\ntarget_link_libraries(benchmark_transport PRIVATE tensorpipe)\n\nadd_executable(benchmark_pipe benchmark_pipe.cc options.cc transport_registry.cc channel_registry.cc)\ntarget_link_libraries(benchmark_pipe PRIVATE tensorpipe tensorpipe_cuda)\n"
  },
  {
    "path": "tensorpipe/benchmark/benchmark_pipe.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <cstring>\n\n#include <future>\n\n#include <tensorpipe/benchmark/channel_registry.h>\n#include <tensorpipe/benchmark/measurements.h>\n#include <tensorpipe/benchmark/options.h>\n#include <tensorpipe/benchmark/transport_registry.h>\n#include <tensorpipe/common/cpu_buffer.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_buffer.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/core/context.h>\n#include <tensorpipe/core/listener.h>\n#include <tensorpipe/core/pipe.h>\n\n// We might sometimes want to run this benchmark using NCCL instead of\n// TensorPipe. We don't want to include NCCL as a submodule and deal with the\n// build issues. So we've prepared the code and left it around, but disabled it.\n#if USE_NCCL\n#include <nccl.h>\n\n#define TP_NCCL_CHECK(op)                   \\\n  {                                         \\\n    ncclResult_t res = (op);                \\\n    TP_THROW_ASSERT_IF(res != ncclSuccess); \\\n  }\n\nstruct NcclCommDeleter {\n  void operator()(ncclComm_t comm) {\n    TP_NCCL_CHECK(ncclCommDestroy(comm));\n  }\n};\n\nusing NcclComm =\n    std::unique_ptr<std::remove_pointer_t<ncclComm_t>, NcclCommDeleter>;\n\nstatic NcclComm createNcclComm(int rank, int worldSize, ncclUniqueId uniqueId) {\n  ncclComm_t comm;\n  TP_NCCL_CHECK(ncclCommInitRank(&comm, worldSize, uniqueId, rank));\n  return NcclComm(comm, NcclCommDeleter{});\n}\n#endif // USE_NCCL\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::benchmark;\n\nstatic constexpr int kNumWarmUpRounds = 5;\n\nusing Payload = std::unique_ptr<uint8_t[]>;\nusing CpuTensor = std::unique_ptr<uint8_t[]>;\n\nstruct CudaMemoryDeleter {\n  void operator()(void* ptr) {\n    TP_CUDA_CHECK(cudaFree(ptr));\n  }\n};\n\nstruct CudaStreamDeleter {\n  void operator()(cudaStream_t stream) {\n    TP_CUDA_CHECK(cudaStreamDestroy(stream));\n  }\n};\n\nusing CudaTensor = std::unique_ptr<uint8_t[], CudaMemoryDeleter>;\nusing CudaStream =\n    std::unique_ptr<std::remove_pointer_t<cudaStream_t>, CudaStreamDeleter>;\n\nstruct Data {\n  size_t numPayloads;\n  size_t payloadSize;\n  std::vector<Payload> expectedPayload;\n  std::vector<std::string> expectedPayloadMetadata;\n  std::vector<Payload> temporaryPayload;\n\n  size_t numTensors;\n  size_t tensorSize;\n  TensorType tensorType;\n  std::vector<CpuTensor> expectedCpuTensor;\n  std::vector<CudaTensor> expectedCudaTensor;\n  std::vector<std::string> expectedTensorMetadata;\n  std::vector<CpuTensor> temporaryCpuTensor;\n  std::vector<CudaTensor> temporaryCudaTensor;\n  CudaStream cudaStream;\n  size_t cudaSyncPeriod;\n\n  std::string expectedMetadata;\n\n#if USE_NCCL\n  NcclComm ncclComm;\n#endif // USE_NCCL\n};\n\nstruct MultiDeviceMeasurements {\n  // The CPU time to do each ping-pong.\n  Measurements cpu;\n  // The CPU time of N iterations, including a final CUDA stream sync.\n  Measurements cuda;\n};\n\nstatic void printMeasurements(Measurements& measurements, size_t dataLen) {\n  measurements.sort();\n  fprintf(\n      stderr,\n      \"%-15s %-15s %-12s %-7s %-7s %-7s %-7s\\n\",\n      \"chunk-size\",\n      \"# ping-pong\",\n      \"avg (usec)\",\n      \"p50\",\n      \"p75\",\n      \"p90\",\n      \"p95\");\n  fprintf(\n      stderr,\n      \"%-15lu %-15lu %-12.3f %-7.3f %-7.3f %-7.3f %-7.3f\\n\",\n      dataLen,\n      measurements.size(),\n      measurements.sum().count() / (float)measurements.size() / 1000.0,\n      measurements.percentile(0.50).count() / 1000.0,\n      measurements.percentile(0.75).count() / 1000.0,\n      measurements.percentile(0.90).count() / 1000.0,\n      measurements.percentile(0.95).count() / 1000.0);\n}\n\nstatic void printMultiDeviceMeasurements(\n    MultiDeviceMeasurements& measurements,\n    size_t dataLen) {\n  printMeasurements(measurements.cpu, dataLen);\n  printMeasurements(measurements.cuda, dataLen);\n}\n\nstatic std::unique_ptr<uint8_t[]> createEmptyCpuData(size_t size) {\n  return std::make_unique<uint8_t[]>(size);\n}\n\nstatic std::unique_ptr<uint8_t[]> createFullCpuData(size_t size) {\n  std::unique_ptr<uint8_t[]> data = createEmptyCpuData(size);\n  // Generate fixed data for validation between peers\n  for (size_t i = 0; i < size; i++) {\n    data[i] = (i >> 8) ^ (i & 0xff);\n  }\n  return data;\n}\n\nstatic CudaTensor createEmptyCudaData(size_t size) {\n  uint8_t* ptr;\n  TP_CUDA_CHECK(cudaMalloc(&ptr, size));\n  return CudaTensor(ptr);\n}\n\nstatic CudaTensor createFullCudaData(size_t size) {\n  uint8_t* ptr;\n  TP_CUDA_CHECK(cudaMalloc(&ptr, size));\n  CpuTensor data = createFullCpuData(size);\n  TP_CUDA_CHECK(cudaMemcpy(ptr, data.get(), size, cudaMemcpyHostToDevice));\n  return CudaTensor(ptr);\n}\n\nstatic CudaStream createCudaStream() {\n  cudaStream_t stream;\n  TP_CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));\n  return CudaStream(stream);\n}\n\nstatic void serverPongPingNonBlock(\n    std::shared_ptr<Pipe> pipe,\n    int& numWarmUps,\n    int& numRoundTrips,\n    std::promise<void>& doneProm,\n    Data& data,\n    Measurements& measurements) {\n#if USE_NCCL\n  for (int iterIdx = 0; iterIdx < numWarmUps + numRoundTrips; iterIdx++) {\n    // TODO Handle multiple tensors.\n    TP_NCCL_CHECK(ncclRecv(\n        data.temporaryCudaTensor[0].get(),\n        data.tensorSize,\n        ncclInt8,\n        1,\n        data.ncclComm.get(),\n        data.cudaStream.get()));\n    TP_NCCL_CHECK(ncclSend(\n        data.temporaryCudaTensor[0].get(),\n        data.tensorSize,\n        ncclInt8,\n        1,\n        data.ncclComm.get(),\n        data.cudaStream.get()));\n  }\n  doneProm.set_value();\n  return;\n#endif // USE_NCCL\n  pipe->readDescriptor(\n      [pipe, &numWarmUps, &numRoundTrips, &doneProm, &data, &measurements](\n          const Error& error, Descriptor descriptor) {\n        TP_THROW_ASSERT_IF(error) << error.what();\n        Allocation allocation;\n        TP_DCHECK_EQ(descriptor.metadata, data.expectedMetadata);\n        if (data.payloadSize > 0) {\n          TP_DCHECK_EQ(descriptor.payloads.size(), data.numPayloads);\n          allocation.payloads.resize(data.numPayloads);\n          for (size_t payloadIdx = 0; payloadIdx < data.numPayloads;\n               payloadIdx++) {\n            TP_DCHECK_EQ(\n                descriptor.payloads[payloadIdx].metadata,\n                data.expectedPayloadMetadata[payloadIdx]);\n            TP_DCHECK_EQ(\n                descriptor.payloads[payloadIdx].length, data.payloadSize);\n            allocation.payloads[payloadIdx].data =\n                data.temporaryPayload[payloadIdx].get();\n          }\n        } else {\n          TP_DCHECK_EQ(descriptor.payloads.size(), 0);\n        }\n        if (data.tensorSize > 0) {\n          TP_DCHECK_EQ(descriptor.tensors.size(), data.numTensors);\n          allocation.tensors.resize(data.numTensors);\n          for (size_t tensorIdx = 0; tensorIdx < data.numTensors; tensorIdx++) {\n            TP_DCHECK_EQ(\n                descriptor.tensors[tensorIdx].metadata,\n                data.expectedTensorMetadata[tensorIdx]);\n            TP_DCHECK_EQ(descriptor.tensors[tensorIdx].length, data.tensorSize);\n            if (data.tensorType == TensorType::kCpu) {\n              allocation.tensors[tensorIdx].buffer = CpuBuffer{\n                  .ptr = data.temporaryCpuTensor[tensorIdx].get(),\n              };\n            } else if (data.tensorType == TensorType::kCuda) {\n              allocation.tensors[tensorIdx].buffer = CudaBuffer{\n                  .ptr = data.temporaryCudaTensor[tensorIdx].get(),\n                  .stream = data.cudaStream.get(),\n              };\n            } else {\n              TP_THROW_ASSERT() << \"Unknown tensor type\";\n            }\n          }\n        } else {\n          TP_DCHECK_EQ(descriptor.tensors.size(), 0);\n        }\n\n        pipe->read(\n            allocation,\n            [pipe,\n             &numWarmUps,\n             &numRoundTrips,\n             &doneProm,\n             &data,\n             &measurements,\n             descriptor{std::move(descriptor)},\n             allocation](const Error& error) {\n              TP_THROW_ASSERT_IF(error) << error.what();\n\n              Message message;\n              if (data.payloadSize > 0) {\n                TP_DCHECK_EQ(allocation.payloads.size(), data.numPayloads);\n                message.payloads.resize(data.numPayloads);\n                for (size_t payloadIdx = 0; payloadIdx < data.numPayloads;\n                     payloadIdx++) {\n                  TP_DCHECK_EQ(\n                      descriptor.payloads[payloadIdx].length, data.payloadSize);\n                  TP_DCHECK_EQ(\n                      memcmp(\n                          allocation.payloads[payloadIdx].data,\n                          data.expectedPayload[payloadIdx].get(),\n                          descriptor.payloads[payloadIdx].length),\n                      0);\n                  message.payloads[payloadIdx] = {\n                      .data = data.expectedPayload[payloadIdx].get(),\n                      .length = descriptor.payloads[payloadIdx].length,\n                  };\n                }\n              } else {\n                TP_DCHECK_EQ(allocation.payloads.size(), 0);\n              }\n              if (data.tensorSize > 0) {\n                TP_DCHECK_EQ(allocation.tensors.size(), data.numTensors);\n                message.tensors.resize(data.numTensors);\n                for (size_t tensorIdx = 0; tensorIdx < data.numTensors;\n                     tensorIdx++) {\n                  TP_DCHECK_EQ(\n                      descriptor.tensors[tensorIdx].length, data.tensorSize);\n                  if (data.tensorType == TensorType::kCpu) {\n                    TP_DCHECK_EQ(\n                        memcmp(\n                            allocation.tensors[tensorIdx]\n                                .buffer.unwrap<CpuBuffer>()\n                                .ptr,\n                            data.expectedCpuTensor[tensorIdx].get(),\n                            descriptor.tensors[tensorIdx].length),\n                        0);\n                  } else if (data.tensorType == TensorType::kCuda) {\n                    // No (easy) way to do a memcmp with CUDA, I believe...\n                  } else {\n                    TP_THROW_ASSERT() << \"Unknown tensor type\";\n                  }\n                  message.tensors[tensorIdx] = {\n                      .buffer = allocation.tensors[tensorIdx].buffer,\n                      .length = descriptor.tensors[tensorIdx].length,\n                      .targetDevice =\n                          descriptor.tensors[tensorIdx].sourceDevice,\n                  };\n                }\n              } else {\n                TP_DCHECK_EQ(allocation.tensors.size(), 0);\n              }\n\n              pipe->write(\n                  std::move(message),\n                  [pipe,\n                   &numWarmUps,\n                   &numRoundTrips,\n                   &doneProm,\n                   &data,\n                   &measurements](const Error& error) {\n                    TP_THROW_ASSERT_IF(error) << error.what();\n                    if (numWarmUps > 0) {\n                      numWarmUps -= 1;\n                    } else {\n                      numRoundTrips -= 1;\n                    }\n                    if (numRoundTrips > 0) {\n                      serverPongPingNonBlock(\n                          pipe,\n                          numWarmUps,\n                          numRoundTrips,\n                          doneProm,\n                          data,\n                          measurements);\n                    } else {\n                      doneProm.set_value();\n                    }\n                  });\n            });\n      });\n}\n\n// Start with receiving ping\nstatic void runServer(const Options& options) {\n  std::string addr = options.address;\n  int numWarmUps = kNumWarmUpRounds;\n  int numRoundTrips = options.numRoundTrips;\n\n  Data data;\n  data.numPayloads = options.numPayloads;\n  data.payloadSize = options.payloadSize;\n  for (size_t payloadIdx = 0; payloadIdx < options.numPayloads; payloadIdx++) {\n    data.expectedPayload.push_back(createFullCpuData(options.payloadSize));\n    data.expectedPayloadMetadata.push_back(\n        std::string(options.metadataSize, 0x42));\n    data.temporaryPayload.push_back(createEmptyCpuData(options.payloadSize));\n  }\n  data.numTensors = options.numTensors;\n  data.tensorSize = options.tensorSize;\n  data.tensorType = options.tensorType;\n  for (size_t tensorIdx = 0; tensorIdx < options.numTensors; tensorIdx++) {\n    data.expectedTensorMetadata.push_back(\n        std::string(options.metadataSize, 0x42));\n    if (options.tensorType == TensorType::kCpu) {\n      data.expectedCpuTensor.push_back(createFullCpuData(options.tensorSize));\n      data.temporaryCpuTensor.push_back(createEmptyCpuData(options.tensorSize));\n    } else if (options.tensorType == TensorType::kCuda) {\n      data.expectedCudaTensor.push_back(createFullCudaData(options.tensorSize));\n      data.temporaryCudaTensor.push_back(\n          createEmptyCudaData(options.tensorSize));\n      data.cudaStream = createCudaStream();\n    } else {\n      TP_THROW_ASSERT() << \"Unknown tensor type\";\n    }\n  }\n  data.cudaSyncPeriod = options.cudaSyncPeriod;\n  data.expectedMetadata = std::string(options.metadataSize, 0x42);\n\n  Measurements measurements;\n  measurements.reserve(options.numRoundTrips);\n\n  std::shared_ptr<Context> context = std::make_shared<Context>();\n  auto transportContext =\n      TensorpipeTransportRegistry().create(options.transport);\n  validateTransportContext(transportContext);\n  context->registerTransport(0, options.transport, transportContext);\n\n  auto channelContext = TensorpipeChannelRegistry().create(options.channel);\n  validateChannelContext(channelContext);\n  context->registerChannel(0, options.channel, channelContext);\n\n  std::promise<std::shared_ptr<Pipe>> pipeProm;\n  std::shared_ptr<Listener> listener = context->listen({addr});\n  listener->accept([&](const Error& error, std::shared_ptr<Pipe> pipe) {\n    TP_THROW_ASSERT_IF(error) << error.what();\n    pipeProm.set_value(std::move(pipe));\n  });\n  std::shared_ptr<Pipe> pipe = pipeProm.get_future().get();\n\n#if USE_NCCL\n  std::promise<ncclUniqueId> uniqueIdProm;\n  pipe->readDescriptor([&](const Error& error, Descriptor descriptor) {\n    TP_THROW_ASSERT_IF(error) << error.what();\n    uniqueIdProm.set_value(\n        *reinterpret_cast<const ncclUniqueId*>(descriptor.metadata.c_str()));\n  });\n  ncclUniqueId uniqueId = uniqueIdProm.get_future().get();\n\n  data.ncclComm = createNcclComm(/*rank=*/0, /*worldSize=*/2, uniqueId);\n#endif\n\n  std::promise<void> doneProm;\n  serverPongPingNonBlock(\n      std::move(pipe), numWarmUps, numRoundTrips, doneProm, data, measurements);\n\n  doneProm.get_future().get();\n  listener.reset();\n  context->join();\n}\n\nstatic void clientPingPongNonBlock(\n    std::shared_ptr<Pipe> pipe,\n    int& numWarmUps,\n    int& numRoundTrips,\n    std::promise<void>& doneProm,\n    Data& data,\n    MultiDeviceMeasurements& measurements) {\n#if USE_NCCL\n  for (int iterIdx = 0; iterIdx < numWarmUps + numRoundTrips; iterIdx++) {\n    if (iterIdx >= numWarmUps) {\n      measurements.cpu.markStart();\n      if ((iterIdx - numWarmUps) % data.cudaSyncPeriod == 0) {\n        measurements.cuda.markStart();\n      }\n    }\n    TP_NCCL_CHECK(ncclSend(\n        data.expectedCudaTensor[0].get(),\n        data.tensorSize,\n        ncclInt8,\n        0,\n        data.ncclComm.get(),\n        data.cudaStream.get()));\n    TP_NCCL_CHECK(ncclRecv(\n        data.temporaryCudaTensor[0].get(),\n        data.tensorSize,\n        ncclInt8,\n        0,\n        data.ncclComm.get(),\n        data.cudaStream.get()));\n    if (iterIdx >= numWarmUps) {\n      measurements.cpu.markStop();\n      if ((iterIdx - numWarmUps + 1) % data.cudaSyncPeriod == 0) {\n        TP_CUDA_CHECK(cudaStreamSynchronize(data.cudaStream.get()));\n        measurements.cuda.markStop(data.cudaSyncPeriod);\n      }\n    }\n  }\n  printMultiDeviceMeasurements(measurements, data.payloadSize);\n  doneProm.set_value();\n  return;\n#endif // USE_NCCL\n  if (numWarmUps == 0) {\n    measurements.cpu.markStart();\n    if (numRoundTrips % data.cudaSyncPeriod == 0) {\n      measurements.cuda.markStart();\n    }\n  }\n  Message message;\n  message.metadata = data.expectedMetadata;\n  if (data.payloadSize > 0) {\n    for (size_t payloadIdx = 0; payloadIdx < data.numPayloads; payloadIdx++) {\n      Message::Payload payload;\n      payload.data = data.expectedPayload[payloadIdx].get();\n      payload.length = data.payloadSize;\n      message.payloads.push_back(std::move(payload));\n    }\n  } else {\n    TP_DCHECK_EQ(message.payloads.size(), 0);\n  }\n  if (data.tensorSize > 0) {\n    for (size_t tensorIdx = 0; tensorIdx < data.numTensors; tensorIdx++) {\n      Message::Tensor tensor;\n      tensor.length = data.tensorSize;\n      if (data.tensorType == TensorType::kCpu) {\n        tensor.buffer =\n            CpuBuffer{.ptr = data.expectedCpuTensor[tensorIdx].get()};\n        tensor.targetDevice = Device(kCpuDeviceType, 0);\n      } else if (data.tensorType == TensorType::kCuda) {\n        tensor.buffer = CudaBuffer{\n            .ptr = data.expectedCudaTensor[tensorIdx].get(),\n            .stream = data.cudaStream.get(),\n        };\n        tensor.targetDevice = Device(kCudaDeviceType, 0);\n      } else {\n        TP_THROW_ASSERT() << \"Unknown tensor type\";\n      }\n      message.tensors.push_back(std::move(tensor));\n    }\n  } else {\n    TP_DCHECK_EQ(message.tensors.size(), 0);\n  }\n  pipe->write(\n      std::move(message),\n      [pipe, &numWarmUps, &numRoundTrips, &doneProm, &data, &measurements](\n          const Error& error) {\n        TP_THROW_ASSERT_IF(error) << error.what();\n        pipe->readDescriptor([pipe,\n                              &numWarmUps,\n                              &numRoundTrips,\n                              &doneProm,\n                              &data,\n                              &measurements](\n                                 const Error& error, Descriptor descriptor) {\n          TP_THROW_ASSERT_IF(error) << error.what();\n\n          Allocation allocation;\n          TP_DCHECK_EQ(descriptor.metadata, data.expectedMetadata);\n          if (data.payloadSize > 0) {\n            TP_DCHECK_EQ(descriptor.payloads.size(), data.numPayloads);\n            allocation.payloads.resize(data.numPayloads);\n            for (size_t payloadIdx = 0; payloadIdx < data.numPayloads;\n                 payloadIdx++) {\n              TP_DCHECK_EQ(\n                  descriptor.payloads[payloadIdx].metadata,\n                  data.expectedPayloadMetadata[payloadIdx]);\n              TP_DCHECK_EQ(\n                  descriptor.payloads[payloadIdx].length, data.payloadSize);\n              allocation.payloads[payloadIdx].data =\n                  data.temporaryPayload[payloadIdx].get();\n            }\n          } else {\n            TP_DCHECK_EQ(descriptor.payloads.size(), 0);\n          }\n          if (data.tensorSize > 0) {\n            TP_DCHECK_EQ(descriptor.tensors.size(), data.numTensors);\n            allocation.tensors.resize(data.numTensors);\n            for (size_t tensorIdx = 0; tensorIdx < data.numTensors;\n                 tensorIdx++) {\n              TP_DCHECK_EQ(\n                  descriptor.tensors[tensorIdx].metadata,\n                  data.expectedTensorMetadata[tensorIdx]);\n              TP_DCHECK_EQ(\n                  descriptor.tensors[tensorIdx].length, data.tensorSize);\n              if (data.tensorType == TensorType::kCpu) {\n                allocation.tensors[tensorIdx].buffer = CpuBuffer{\n                    .ptr = data.temporaryCpuTensor[tensorIdx].get(),\n                };\n              } else if (data.tensorType == TensorType::kCuda) {\n                allocation.tensors[tensorIdx].buffer = CudaBuffer{\n                    .ptr = data.temporaryCudaTensor[tensorIdx].get(),\n                    .stream = data.cudaStream.get(),\n                };\n              } else {\n                TP_THROW_ASSERT() << \"Unknown tensor type\";\n              }\n            }\n          } else {\n            TP_DCHECK_EQ(descriptor.tensors.size(), 0);\n          }\n          pipe->read(\n              allocation,\n              [pipe,\n               &numWarmUps,\n               &numRoundTrips,\n               &doneProm,\n               &data,\n               &measurements,\n               descriptor{std::move(descriptor)},\n               allocation](const Error& error) {\n                if (numWarmUps == 0) {\n                  measurements.cpu.markStop();\n                  if ((numRoundTrips - 1) % data.cudaSyncPeriod == 0) {\n                    TP_CUDA_CHECK(cudaStreamSynchronize(data.cudaStream.get()));\n                    measurements.cuda.markStop(data.cudaSyncPeriod);\n                  }\n                }\n                TP_THROW_ASSERT_IF(error) << error.what();\n                if (data.payloadSize > 0) {\n                  TP_DCHECK_EQ(allocation.payloads.size(), data.numPayloads);\n                  for (size_t payloadIdx = 0; payloadIdx < data.numPayloads;\n                       payloadIdx++) {\n                    TP_DCHECK_EQ(\n                        memcmp(\n                            allocation.payloads[payloadIdx].data,\n                            data.expectedPayload[payloadIdx].get(),\n                            descriptor.payloads[payloadIdx].length),\n                        0);\n                  }\n                } else {\n                  TP_DCHECK_EQ(allocation.payloads.size(), 0);\n                }\n                if (data.tensorSize > 0) {\n                  TP_DCHECK_EQ(allocation.tensors.size(), data.numTensors);\n                  for (size_t tensorIdx = 0; tensorIdx < data.numTensors;\n                       tensorIdx++) {\n                    if (data.tensorType == TensorType::kCpu) {\n                      TP_DCHECK_EQ(\n                          memcmp(\n                              allocation.tensors[tensorIdx]\n                                  .buffer.unwrap<CpuBuffer>()\n                                  .ptr,\n                              data.expectedCpuTensor[tensorIdx].get(),\n                              descriptor.tensors[tensorIdx].length),\n                          0);\n                    } else if (data.tensorType == TensorType::kCuda) {\n                      // No (easy) way to do a memcmp with CUDA, I\n                      // believe...\n                    } else {\n                      TP_THROW_ASSERT() << \"Unknown tensor type\";\n                    }\n                  }\n                } else {\n                  TP_DCHECK_EQ(allocation.tensors.size(), 0);\n                }\n                if (numWarmUps > 0) {\n                  numWarmUps -= 1;\n                } else {\n                  numRoundTrips -= 1;\n                }\n                if (numRoundTrips > 0) {\n                  clientPingPongNonBlock(\n                      pipe,\n                      numWarmUps,\n                      numRoundTrips,\n                      doneProm,\n                      data,\n                      measurements);\n                } else {\n                  printMultiDeviceMeasurements(measurements, data.payloadSize);\n                  doneProm.set_value();\n                }\n              });\n        });\n      });\n}\n\n// Start with sending ping\nstatic void runClient(const Options& options) {\n  std::string addr = options.address;\n  int numWarmUps = kNumWarmUpRounds;\n  int numRoundTrips = options.numRoundTrips;\n\n  Data data;\n  data.numPayloads = options.numPayloads;\n  data.payloadSize = options.payloadSize;\n  for (size_t payloadIdx = 0; payloadIdx < options.numPayloads; payloadIdx++) {\n    data.expectedPayload.push_back(createFullCpuData(options.payloadSize));\n    data.expectedPayloadMetadata.push_back(\n        std::string(options.metadataSize, 0x42));\n    data.temporaryPayload.push_back(createEmptyCpuData(options.payloadSize));\n  }\n  data.numTensors = options.numTensors;\n  data.tensorSize = options.tensorSize;\n  data.tensorType = options.tensorType;\n  for (size_t tensorIdx = 0; tensorIdx < options.numTensors; tensorIdx++) {\n    data.expectedTensorMetadata.push_back(\n        std::string(options.metadataSize, 0x42));\n    if (data.tensorType == TensorType::kCpu) {\n      data.expectedCpuTensor.push_back(createFullCpuData(options.tensorSize));\n      data.temporaryCpuTensor.push_back(createEmptyCpuData(options.tensorSize));\n    } else if (data.tensorType == TensorType::kCuda) {\n      data.expectedCudaTensor.push_back(createFullCudaData(options.tensorSize));\n      data.temporaryCudaTensor.push_back(\n          createEmptyCudaData(options.tensorSize));\n      data.cudaStream = createCudaStream();\n    } else {\n      TP_THROW_ASSERT() << \"Unknown tensor type\";\n    }\n  }\n  data.cudaSyncPeriod = options.cudaSyncPeriod;\n  data.expectedMetadata = std::string(options.metadataSize, 0x42);\n\n  MultiDeviceMeasurements measurements;\n  measurements.cpu.reserve(options.numRoundTrips);\n  measurements.cuda.reserve(options.numRoundTrips / data.cudaSyncPeriod);\n\n  std::shared_ptr<Context> context = std::make_shared<Context>();\n  auto transportContext =\n      TensorpipeTransportRegistry().create(options.transport);\n  validateTransportContext(transportContext);\n  context->registerTransport(0, options.transport, transportContext);\n\n  auto channelContext = TensorpipeChannelRegistry().create(options.channel);\n  validateChannelContext(channelContext);\n  context->registerChannel(0, options.channel, channelContext);\n\n  std::shared_ptr<Pipe> pipe = context->connect(addr);\n\n#if USE_NCCL\n  ncclUniqueId uniqueId;\n  TP_NCCL_CHECK(ncclGetUniqueId(&uniqueId));\n  Message message;\n  message.metadata = std::string(\n      reinterpret_cast<char*>(&uniqueId),\n      reinterpret_cast<char*>(&uniqueId) + sizeof(ncclUniqueId));\n  std::promise<void> promise;\n  pipe->write(std::move(message), [&](const Error& error) {\n    TP_THROW_ASSERT_IF(error) << error.what();\n    promise.set_value();\n  });\n  promise.get_future().get();\n\n  data.ncclComm = createNcclComm(/*rank=*/1, /*worldSize=*/2, uniqueId);\n#endif // USE_NCCL\n\n  std::promise<void> doneProm;\n  clientPingPongNonBlock(\n      std::move(pipe), numWarmUps, numRoundTrips, doneProm, data, measurements);\n\n  doneProm.get_future().get();\n  context->join();\n}\n\nint main(int argc, char** argv) {\n  struct Options x = parseOptions(argc, argv);\n  std::cout << \"mode = \" << x.mode << \"\\n\";\n  std::cout << \"transport = \" << x.transport << \"\\n\";\n  std::cout << \"channel = \" << x.channel << \"\\n\";\n  std::cout << \"address = \" << x.address << \"\\n\";\n  std::cout << \"num_round_trips = \" << x.numRoundTrips << \"\\n\";\n  std::cout << \"num_payloads = \" << x.numPayloads << \"\\n\";\n  std::cout << \"payload_size = \" << x.payloadSize << \"\\n\";\n  std::cout << \"num_tensors = \" << x.numTensors << \"\\n\";\n  std::cout << \"tensor_size = \" << x.tensorSize << \"\\n\";\n  std::cout << \"tensor_type = \"\n            << (x.tensorType == TensorType::kCpu ? \"cpu\" : \"cuda\") << \"\\n\";\n  std::cout << \"metadata_size = \" << x.metadataSize << \"\\n\";\n\n  if (x.mode == \"listen\") {\n    runServer(x);\n  } else if (x.mode == \"connect\") {\n    runClient(x);\n  } else {\n    // Should never be here\n    TP_THROW_ASSERT() << \"unknown mode: \" << x.mode;\n  }\n\n  return 0;\n}\n"
  },
  {
    "path": "tensorpipe/benchmark/benchmark_transport.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <cstring>\n\n#include <future>\n\n#include <tensorpipe/benchmark/measurements.h>\n#include <tensorpipe/benchmark/options.h>\n#include <tensorpipe/benchmark/transport_registry.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/transport/connection.h>\n#include <tensorpipe/transport/listener.h>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::benchmark;\nusing namespace tensorpipe::transport;\n\nstruct Data {\n  std::unique_ptr<uint8_t[]> expected;\n  std::unique_ptr<uint8_t[]> temporary;\n  size_t size;\n};\n\nstatic void printMeasurements(Measurements& measurements, size_t dataLen) {\n  measurements.sort();\n  fprintf(\n      stderr,\n      \"%-15s %-15s %-12s %-7s %-7s %-7s %-7s\\n\",\n      \"chunk-size\",\n      \"# ping-pong\",\n      \"avg (usec)\",\n      \"p50\",\n      \"p75\",\n      \"p90\",\n      \"p95\");\n  fprintf(\n      stderr,\n      \"%-15lu %-15lu %-12.3f %-7.3f %-7.3f %-7.3f %-7.3f\\n\",\n      dataLen,\n      measurements.size(),\n      measurements.sum().count() / (float)measurements.size() / 1000.0,\n      measurements.percentile(0.50).count() / 1000.0,\n      measurements.percentile(0.75).count() / 1000.0,\n      measurements.percentile(0.90).count() / 1000.0,\n      measurements.percentile(0.95).count() / 1000.0);\n}\n\nstatic std::unique_ptr<uint8_t[]> createData(const int size) {\n  auto data = std::make_unique<uint8_t[]>(size);\n  // Generate fixed data for validation between peers\n  for (int i = 0; i < size; i++) {\n    data[i] = (i >> 8) ^ (i & 0xff);\n  }\n  return data;\n}\n\nstatic void serverPongPingNonBlock(\n    std::shared_ptr<Connection> conn,\n    int& numRoundTrips,\n    std::promise<void>& doneProm,\n    Data& data,\n    Measurements& measurements) {\n  conn->read(\n      data.temporary.get(),\n      data.size,\n      [conn, &numRoundTrips, &doneProm, &data, &measurements](\n          const Error& error, const void* ptr, size_t len) {\n        TP_THROW_ASSERT_IF(error) << error.what();\n        TP_DCHECK_EQ(len, data.size);\n        TP_DCHECK_EQ(memcmp(ptr, data.expected.get(), len), 0);\n        conn->write(\n            data.temporary.get(),\n            data.size,\n            [conn, &numRoundTrips, &doneProm, &data, &measurements](\n                const Error& error) {\n              TP_THROW_ASSERT_IF(error) << error.what();\n              if (--numRoundTrips > 0) {\n                serverPongPingNonBlock(\n                    conn, numRoundTrips, doneProm, data, measurements);\n              } else {\n                doneProm.set_value();\n              }\n            });\n      });\n}\n\n// Start with receiving ping\nstatic void runServer(const Options& options) {\n  std::string addr = options.address;\n  int numRoundTrips = options.numRoundTrips;\n  Data data = {\n      createData(options.payloadSize),\n      std::make_unique<uint8_t[]>(options.payloadSize),\n      options.payloadSize};\n  Measurements measurements;\n  measurements.reserve(options.numRoundTrips);\n\n  std::shared_ptr<transport::Context> context;\n  context = TensorpipeTransportRegistry().create(options.transport);\n  validateTransportContext(context);\n\n  std::promise<std::shared_ptr<Connection>> connProm;\n  std::shared_ptr<transport::Listener> listener = context->listen(addr);\n  listener->accept([&](const Error& error, std::shared_ptr<Connection> conn) {\n    TP_THROW_ASSERT_IF(error) << error.what();\n    connProm.set_value(std::move(conn));\n  });\n  std::shared_ptr<Connection> conn = connProm.get_future().get();\n\n  std::promise<void> doneProm;\n  serverPongPingNonBlock(\n      std::move(conn), numRoundTrips, doneProm, data, measurements);\n\n  doneProm.get_future().get();\n  context->join();\n}\n\nstatic void clientPingPongNonBlock(\n    std::shared_ptr<Connection> conn,\n    int& numRoundTrips,\n    std::promise<void>& doneProm,\n    Data& data,\n    Measurements& measurements) {\n  measurements.markStart();\n  conn->write(\n      data.expected.get(),\n      data.size,\n      [conn, &numRoundTrips, &doneProm, &data, &measurements](\n          const Error& error) {\n        TP_THROW_ASSERT_IF(error) << error.what();\n        conn->read(\n            data.temporary.get(),\n            data.size,\n            [conn, &numRoundTrips, &doneProm, &data, &measurements](\n                const Error& error, const void* ptr, size_t len) {\n              measurements.markStop();\n              TP_THROW_ASSERT_IF(error) << error.what();\n              TP_DCHECK_EQ(len, data.size);\n              TP_DCHECK_EQ(memcmp(ptr, data.expected.get(), len), 0);\n              if (--numRoundTrips > 0) {\n                clientPingPongNonBlock(\n                    conn, numRoundTrips, doneProm, data, measurements);\n              } else {\n                printMeasurements(measurements, data.size);\n                doneProm.set_value();\n              }\n            });\n      });\n}\n\n// Start with sending ping\nstatic void runClient(const Options& options) {\n  std::string addr = options.address;\n  int numRoundTrips = options.numRoundTrips;\n  Data data = {\n      createData(options.payloadSize),\n      std::make_unique<uint8_t[]>(options.payloadSize),\n      options.payloadSize};\n  Measurements measurements;\n  measurements.reserve(options.numRoundTrips);\n\n  std::shared_ptr<transport::Context> context;\n  context = TensorpipeTransportRegistry().create(options.transport);\n  validateTransportContext(context);\n  std::shared_ptr<Connection> conn = context->connect(addr);\n\n  std::promise<void> doneProm;\n  clientPingPongNonBlock(\n      std::move(conn), numRoundTrips, doneProm, data, measurements);\n\n  doneProm.get_future().get();\n  context->join();\n}\n\nint main(int argc, char** argv) {\n  struct Options x = parseOptions(argc, argv);\n  std::cout << \"mode = \" << x.mode << \"\\n\";\n  std::cout << \"transport = \" << x.transport << \"\\n\";\n  std::cout << \"address = \" << x.address << \"\\n\";\n  std::cout << \"num_round_trips = \" << x.numRoundTrips << \"\\n\";\n  std::cout << \"payload_size = \" << x.payloadSize << \"\\n\";\n\n  if (x.mode == \"listen\") {\n    runServer(x);\n  } else if (x.mode == \"connect\") {\n    runClient(x);\n  } else {\n    // Should never be here\n    TP_THROW_ASSERT() << \"unknown mode: \" << x.mode;\n  }\n\n  return 0;\n}\n"
  },
  {
    "path": "tensorpipe/benchmark/channel_registry.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/benchmark/channel_registry.h>\n\n#include <tensorpipe/tensorpipe.h>\n#include <tensorpipe/tensorpipe_cuda.h>\n\nTP_DEFINE_SHARED_REGISTRY(\n    TensorpipeChannelRegistry,\n    tensorpipe::channel::Context);\n\n// BASIC\n\nstd::shared_ptr<tensorpipe::channel::Context> makeBasicChannel() {\n  return tensorpipe::channel::basic::create();\n}\n\nTP_REGISTER_CREATOR(TensorpipeChannelRegistry, basic, makeBasicChannel);\n\n// CMA\n\n#if TENSORPIPE_HAS_CMA_CHANNEL\nstd::shared_ptr<tensorpipe::channel::Context> makeCmaChannel() {\n  return tensorpipe::channel::cma::create();\n}\n\nTP_REGISTER_CREATOR(TensorpipeChannelRegistry, cma, makeCmaChannel);\n#endif // TENSORPIPE_HAS_CMA_CHANNEL\n\n// MPT\n\nstd::shared_ptr<tensorpipe::channel::Context> makeMptChannel() {\n  throw std::runtime_error(\"mtp channel requires arguments\");\n}\n\nTP_REGISTER_CREATOR(TensorpipeChannelRegistry, mpt, makeMptChannel);\n\n// XTH\n\nstd::shared_ptr<tensorpipe::channel::Context> makeXthChannel() {\n  return tensorpipe::channel::xth::create();\n}\n\nTP_REGISTER_CREATOR(TensorpipeChannelRegistry, xth, makeXthChannel);\n\n// CUDA XTH\n\nstd::shared_ptr<tensorpipe::channel::Context> makeCudaXthChannel() {\n  return tensorpipe::channel::cuda_xth::create();\n}\n\nTP_REGISTER_CREATOR(TensorpipeChannelRegistry, cuda_xth, makeCudaXthChannel);\n\n// CUDA BASIC\n\nstd::shared_ptr<tensorpipe::channel::Context> makeCudaBasicChannel() {\n  return tensorpipe::channel::cuda_basic::create(\n      tensorpipe::channel::basic::create());\n}\n\nTP_REGISTER_CREATOR(\n    TensorpipeChannelRegistry,\n    cuda_basic,\n    makeCudaBasicChannel);\n\n// CUDA IPC\n\n#if TENSORPIPE_HAS_CUDA_IPC_CHANNEL\nstd::shared_ptr<tensorpipe::channel::Context> makeCudaIpcChannel() {\n  return tensorpipe::channel::cuda_ipc::create();\n}\n\nTP_REGISTER_CREATOR(TensorpipeChannelRegistry, cuda_ipc, makeCudaIpcChannel);\n#endif // TENSORPIPE_HAS_CUDA_IPC_CHANNEL\n\n// CUDA GDR\n\n#if TENSORPIPE_HAS_CUDA_GDR_CHANNEL\nstd::shared_ptr<tensorpipe::channel::Context> makeCudaGdrChannel() {\n  return tensorpipe::channel::cuda_gdr::create();\n}\n\nTP_REGISTER_CREATOR(TensorpipeChannelRegistry, cuda_gdr, makeCudaGdrChannel);\n#endif // TENSORPIPE_HAS_CUDA_GDR_CHANNEL\n\nvoid validateChannelContext(\n    std::shared_ptr<tensorpipe::channel::Context> context) {\n  if (!context) {\n    auto keys = TensorpipeChannelRegistry().keys();\n    std::cout\n        << \"The channel you passed in is not supported. The following channels are valid: \";\n    for (const auto& key : keys) {\n      std::cout << key << \", \";\n    }\n    std::cout << \"\\n\";\n    exit(EXIT_FAILURE);\n  }\n}\n"
  },
  {
    "path": "tensorpipe/benchmark/channel_registry.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <tensorpipe/benchmark/registry.h>\n#include <tensorpipe/channel/context.h>\n\nTP_DECLARE_SHARED_REGISTRY(\n    TensorpipeChannelRegistry,\n    tensorpipe::channel::Context);\n\nvoid validateChannelContext(\n    std::shared_ptr<tensorpipe::channel::Context> context);\n"
  },
  {
    "path": "tensorpipe/benchmark/measurements.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <algorithm>\n#include <chrono>\n#include <vector>\n\nnamespace tensorpipe {\nnamespace benchmark {\n\nclass Measurements {\n  using clock = std::chrono::high_resolution_clock;\n  using nanoseconds = std::chrono::nanoseconds;\n\n public:\n  void markStart() {\n    start_ = clock::now();\n  }\n\n  void markStop(size_t count = 1) {\n    samples_.push_back((clock::now() - start_) / count);\n  }\n\n  void sort() {\n    std::sort(samples_.begin(), samples_.end());\n  }\n\n  void reserve(size_t capacity) {\n    samples_.reserve(capacity);\n  }\n\n  size_t size() const {\n    return samples_.size();\n  }\n\n  nanoseconds sum() const {\n    nanoseconds sum{0};\n    for (const auto& sample : samples_) {\n      sum += sample;\n    }\n    return sum;\n  }\n\n  nanoseconds percentile(float f) const {\n    return samples_[static_cast<size_t>(f * samples_.size())];\n  }\n\n private:\n  clock::time_point start_;\n  std::vector<nanoseconds> samples_;\n};\n\n} // namespace benchmark\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/benchmark/options.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/benchmark/options.h>\n\n#include <getopt.h>\n#include <stdio.h>\n#include <stdlib.h>\n\nnamespace tensorpipe {\nnamespace benchmark {\n\nstatic void usage(int status, const char* argv0) {\n  if (status != EXIT_SUCCESS) {\n    fprintf(stderr, \"`%s --help' for more information.\\n\", argv0);\n    exit(status);\n  }\n\n  fprintf(stderr, \"Usage: %s [OPTIONS]\\n\", argv0);\n#define X(x) fputs(x \"\\n\", stderr);\n  X(\"\");\n  X(\"--mode=MODE                      Running mode [listen|connect]\");\n  X(\"--transport=TRANSPORT            Transport backend [shm|uv]\");\n  X(\"--channel=CHANNEL                Channel backend [basic]\");\n  X(\"--address=ADDRESS                Address to listen or connect to\");\n  X(\"--num-round-trips=NUM            Number of write/read pairs to perform\");\n  X(\"--num-payloads=NUM [optional]    Number of payloads of each write/read pair\");\n  X(\"--payload-size=SIZE [optional]   Size of payload of each write/read pair\");\n  X(\"--num-tensors=NUM [optional]     Number of tensors of each write/read pair\");\n  X(\"--tensor-size=SIZE [optional]    Size of tensor of each write/read pair\");\n  X(\"--tensor-type=TYPE [optional]    Type of tensor (cpu or cuda)\");\n  X(\"--metadata-size=SIZE [optional]  Size of metadata of each write/read pair\");\n  X(\"--cuda-sync-period=NUM [optiona] Number of round-trips between two stream syncs\");\n\n  exit(status);\n}\n\nstatic void validateOptions(Options options, const char* argv0) {\n  int status = EXIT_SUCCESS;\n  if (options.mode.empty()) {\n    fprintf(stderr, \"Missing argument: --mode must be set\\n\");\n    status = EXIT_FAILURE;\n  }\n  if (options.transport.empty()) {\n    fprintf(stderr, \"Missing argument: --transport must be set\\n\");\n    status = EXIT_FAILURE;\n  }\n  if (options.address.empty()) {\n    fprintf(stderr, \"Missing argument: --address must be set\\n\");\n    status = EXIT_FAILURE;\n  }\n  if (options.numRoundTrips <= 0) {\n    fprintf(stderr, \"Missing argument: --num-round-trips must be set\\n\");\n    status = EXIT_FAILURE;\n  }\n  if (status != EXIT_SUCCESS) {\n    usage(status, argv0);\n  }\n}\n\nstruct Options parseOptions(int argc, char** argv) {\n  struct Options options;\n  int opt;\n  int flag = -1;\n\n  enum Flags : int {\n    MODE,\n    TRANSPORT,\n    CHANNEL,\n    ADDRESS,\n    NUM_ROUND_TRIPS,\n    NUM_PAYLOADS,\n    PAYLOAD_SIZE,\n    NUM_TENSORS,\n    TENSOR_SIZE,\n    TENSOR_TYPE,\n    METADATA_SIZE,\n    CUDA_SYNC_PERIOD,\n    HELP,\n  };\n\n  static struct option longOptions[] = {\n      {\"mode\", required_argument, &flag, MODE},\n      {\"transport\", required_argument, &flag, TRANSPORT},\n      {\"channel\", required_argument, &flag, CHANNEL},\n      {\"address\", required_argument, &flag, ADDRESS},\n      {\"num-round-trips\", required_argument, &flag, NUM_ROUND_TRIPS},\n      {\"num-payloads\", required_argument, &flag, NUM_PAYLOADS},\n      {\"payload-size\", required_argument, &flag, PAYLOAD_SIZE},\n      {\"num-tensors\", required_argument, &flag, NUM_TENSORS},\n      {\"tensor-size\", required_argument, &flag, TENSOR_SIZE},\n      {\"tensor-type\", required_argument, &flag, TENSOR_TYPE},\n      {\"metadata-size\", required_argument, &flag, METADATA_SIZE},\n      {\"cuda-sync-period\", required_argument, &flag, CUDA_SYNC_PERIOD},\n      {\"help\", no_argument, &flag, HELP},\n      {nullptr, 0, nullptr, 0}};\n\n  while (1) {\n    opt = getopt_long(argc, argv, \"\", longOptions, nullptr);\n    if (opt == -1) {\n      break;\n    }\n    if (opt != 0) {\n      usage(EXIT_FAILURE, argv[0]);\n      break;\n    }\n    switch (flag) {\n      case MODE:\n        options.mode = std::string(optarg);\n        if (options.mode != \"listen\" && options.mode != \"connect\") {\n          fprintf(stderr, \"Error:\\n\");\n          fprintf(stderr, \"  --mode must be [listen|connect]\\n\");\n          exit(EXIT_FAILURE);\n        }\n        break;\n      case TRANSPORT:\n        options.transport = std::string(optarg);\n        break;\n      case CHANNEL:\n        options.channel = std::string(optarg);\n        break;\n      case ADDRESS:\n        options.address = std::string(optarg);\n        break;\n      case NUM_ROUND_TRIPS:\n        options.numRoundTrips = std::strtol(optarg, nullptr, 10);\n        break;\n      case NUM_PAYLOADS:\n        options.numPayloads = std::strtoull(optarg, nullptr, 10);\n        break;\n      case PAYLOAD_SIZE:\n        options.payloadSize = std::strtoull(optarg, nullptr, 10);\n        break;\n      case NUM_TENSORS:\n        options.numTensors = std::strtoull(optarg, nullptr, 10);\n        break;\n      case TENSOR_SIZE:\n        options.tensorSize = std::strtoull(optarg, nullptr, 10);\n        break;\n      case TENSOR_TYPE:\n        if (std::string(optarg) == \"cpu\") {\n          options.tensorType = TensorType::kCpu;\n        } else if (std::string(optarg) == \"cuda\") {\n          options.tensorType = TensorType::kCuda;\n        } else {\n          fprintf(stderr, \"Error:\\n\");\n          fprintf(stderr, \"  --tensor-type must be [cpu|cuda]\\n\");\n          exit(EXIT_FAILURE);\n        }\n        break;\n      case METADATA_SIZE:\n        options.metadataSize = std::strtoull(optarg, nullptr, 10);\n        break;\n      case CUDA_SYNC_PERIOD:\n        options.cudaSyncPeriod = std::strtoull(optarg, nullptr, 10);\n        break;\n      case HELP:\n        usage(EXIT_SUCCESS, argv[0]);\n        break;\n      default:\n        usage(EXIT_FAILURE, argv[0]);\n        break;\n    }\n  }\n\n  validateOptions(options, argv[0]);\n\n  return options;\n}\n\n} // namespace benchmark\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/benchmark/options.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n\n#include <tensorpipe/channel/context.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace benchmark {\n\nenum class TensorType {\n  kCpu,\n  kCuda,\n};\n\nstruct Options {\n  std::string mode; // server or client\n  std::string transport; // shm or uv\n  std::string channel; // basic\n  std::string address; // address for listen or connect\n  int numRoundTrips{0}; // number of write/read pairs\n  size_t numPayloads{0};\n  size_t payloadSize{0};\n  size_t numTensors{0};\n  size_t tensorSize{0};\n  TensorType tensorType{TensorType::kCpu};\n  size_t metadataSize{0};\n  size_t cudaSyncPeriod{1};\n};\n\nstruct Options parseOptions(int argc, char** argv);\n\n} // namespace benchmark\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/benchmark/registry.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n// NB: This Registry works poorly when you have other namespaces.\n\n/**\n * Simple registry implementation that uses static variables to\n * register object creators during program initialization time. This registry\n * implementation is largely borrowed from the PyTorch registry utility in file\n * pytorch/c10/util/Registry.h.\n */\n\n#pragma once\n\n#include <algorithm>\n#include <cstdio>\n#include <cstdlib>\n#include <functional>\n#include <iostream>\n#include <memory>\n#include <string>\n#include <unordered_map>\n#include <vector>\n\nnamespace tensorpipe {\n\n/**\n * @brief A template class that allows one to register classes by keys.\n *\n * The keys are usually a std::string specifying the name, but can be anything\n * that can be used in a std::map.\n *\n * You should most likely not use the Registry class explicitly, but use the\n * helper macros below to declare specific registries as well as registering\n * objects.\n */\ntemplate <class ObjectPtrType, class... Args>\nclass Registry {\n public:\n  typedef std::function<ObjectPtrType(Args...)> Creator;\n\n  Registry() : registry_() {}\n\n  // Adds a key and its associated creator to the desired registry. If the key\n  // already exists in the registry, we simply replace the old creator\n  // with the new args for the key.\n  void registerCreator(std::string key, Creator creator) {\n    registry_[key] = creator;\n  }\n\n  // Allows you to register and key/Creator pair and provide a help_messge for\n  // the key as well.\n  void registerCreator(\n      std::string key,\n      Creator creator,\n      const std::string& helpMsg) {\n    registerCreator(key, creator);\n    helpMessage_[key] = helpMsg;\n  }\n\n  // Returns whether a particular key exists in the given registry.\n  inline bool has(std::string key) {\n    return (registry_.count(key) != 0);\n  }\n\n  // Given the key, create() invokes the creator with the provided args and\n  // returns the object that the creator function constructs.\n  ObjectPtrType create(std::string key, Args... args) {\n    if (registry_.count(key) == 0) {\n      // Returns nullptr if the key is not registered.\n      return nullptr;\n    }\n    return registry_[key](args...);\n  }\n\n  // Returns the registered keys as a std::vector.\n  std::vector<std::string> keys() const {\n    std::vector<std::string> keys;\n    for (const auto& it : registry_) {\n      keys.push_back(it.first);\n    }\n    return keys;\n  }\n\n  // Returns the help_message for the key if one is provided.\n  inline const std::unordered_map<std::string, std::string>& helpMessage()\n      const {\n    return helpMessage_;\n  }\n\n  const char* helpMessage(std::string key) const {\n    auto it = helpMessage_.find(key);\n    if (it == helpMessage_.end()) {\n      return nullptr;\n    }\n    return it->second.c_str();\n  }\n\n private:\n  std::unordered_map<std::string, Creator> registry_;\n  std::unordered_map<std::string, std::string> helpMessage_;\n};\n\n// Registerer is a class template that simplifies Register-ing keys for a given\n// registry.\ntemplate <class ObjectPtrType, class... Args>\nclass Registerer {\n public:\n  explicit Registerer(\n      std::string key,\n      Registry<ObjectPtrType, Args...>& registry,\n      typename Registry<ObjectPtrType, Args...>::Creator creator,\n      const std::string& helpMsg = \"\") {\n    registry.registerCreator(key, creator, helpMsg);\n  }\n};\n\n// The following macros should be used to create/add to registries. Avoid\n// invoking the Registry class template functions directly.\n\n#define TP_CONCATENATE_IMPL(s1, s2) s1##s2\n#define TP_CONCATENATE(s1, s2) TP_CONCATENATE_IMPL(s1, s2)\n#define TP_ANONYMOUS_VARIABLE(str) TP_CONCATENATE(str, __LINE__)\n\n// Using the construct on first use idiom to avoid static order initialization\n// issue. Refer to this link for reference:\n// https://isocpp.org/wiki/faq/ctors#static-init-order-on-first-use\n#define TP_DEFINE_TYPED_REGISTRY(RegistryName, ObjectType, PtrType, ...)     \\\n  tensorpipe::Registry<PtrType<ObjectType>, ##__VA_ARGS__>& RegistryName() { \\\n    static tensorpipe::Registry<PtrType<ObjectType>, ##__VA_ARGS__>*         \\\n        registry =                                                           \\\n            new tensorpipe::Registry<PtrType<ObjectType>, ##__VA_ARGS__>();  \\\n    return *registry;                                                        \\\n  }\n\n#define TP_DECLARE_TYPED_REGISTRY(RegistryName, ObjectType, PtrType, ...)   \\\n  tensorpipe::Registry<PtrType<ObjectType>, ##__VA_ARGS__>& RegistryName(); \\\n  typedef tensorpipe::Registerer<PtrType<ObjectType>, ##__VA_ARGS__>        \\\n      Registerer##RegistryName\n\n#define TP_DEFINE_SHARED_REGISTRY(RegistryName, ObjectType, ...) \\\n  TP_DEFINE_TYPED_REGISTRY(                                      \\\n      RegistryName, ObjectType, std::shared_ptr, ##__VA_ARGS__)\n\n#define TP_DECLARE_SHARED_REGISTRY(RegistryName, ObjectType, ...) \\\n  TP_DECLARE_TYPED_REGISTRY(                                      \\\n      RegistryName, ObjectType, std::shared_ptr, ##__VA_ARGS__)\n\n#define TP_REGISTER_TYPED_CREATOR(RegistryName, key, ...)                  \\\n  static Registerer##RegistryName TP_ANONYMOUS_VARIABLE(g_##RegistryName)( \\\n      key, RegistryName(), ##__VA_ARGS__);\n\n#define TP_REGISTER_CREATOR(RegistryName, key, ...) \\\n  TP_REGISTER_TYPED_CREATOR(RegistryName, #key, __VA_ARGS__)\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/benchmark/transport_registry.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/benchmark/transport_registry.h>\n\n#include <tensorpipe/tensorpipe.h>\n\nTP_DEFINE_SHARED_REGISTRY(\n    TensorpipeTransportRegistry,\n    tensorpipe::transport::Context);\n\n// IBV\n\n#if TENSORPIPE_HAS_IBV_TRANSPORT\nstd::shared_ptr<tensorpipe::transport::Context> makeIbvContext() {\n  return tensorpipe::transport::ibv::create();\n}\n\nTP_REGISTER_CREATOR(TensorpipeTransportRegistry, ibv, makeIbvContext);\n#endif // TENSORPIPE_HAS_IBV_TRANSPORT\n\n// SHM\n\n#if TENSORPIPE_HAS_SHM_TRANSPORT\nstd::shared_ptr<tensorpipe::transport::Context> makeShmContext() {\n  return tensorpipe::transport::shm::create();\n}\n\nTP_REGISTER_CREATOR(TensorpipeTransportRegistry, shm, makeShmContext);\n#endif // TENSORPIPE_HAS_SHM_TRANSPORT\n\n// UV\n\nstd::shared_ptr<tensorpipe::transport::Context> makeUvContext() {\n  return tensorpipe::transport::uv::create();\n}\n\nTP_REGISTER_CREATOR(TensorpipeTransportRegistry, uv, makeUvContext);\n\nvoid validateTransportContext(\n    std::shared_ptr<tensorpipe::transport::Context> context) {\n  if (!context) {\n    auto keys = TensorpipeTransportRegistry().keys();\n    std::cout\n        << \"The transport you passed in is not supported. The following transports are valid: \";\n    for (const auto& key : keys) {\n      std::cout << key << \", \";\n    }\n    std::cout << \"\\n\";\n    exit(EXIT_FAILURE);\n  }\n}\n"
  },
  {
    "path": "tensorpipe/benchmark/transport_registry.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <tensorpipe/benchmark/registry.h>\n#include <tensorpipe/transport/context.h>\n\nTP_DECLARE_SHARED_REGISTRY(\n    TensorpipeTransportRegistry,\n    tensorpipe::transport::Context);\n\nvoid validateTransportContext(\n    std::shared_ptr<tensorpipe::transport::Context> context);\n"
  },
  {
    "path": "tensorpipe/channel/basic/channel_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/basic/channel_impl.h>\n\n#include <memory>\n#include <string>\n#include <utility>\n\n#include <tensorpipe/channel/basic/context_impl.h>\n#include <tensorpipe/common/cpu_buffer.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/transport/connection.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace basic {\n\nChannelImpl::ChannelImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::shared_ptr<transport::Connection> connection)\n    : ChannelImplBoilerplate<ContextImpl, ChannelImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      connection_(std::move(connection)) {}\n\nvoid ChannelImpl::initImplFromLoop() {\n  context_->enroll(*this);\n}\n\nvoid ChannelImpl::sendImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TSendCallback callback) {\n  SendOpIter opIter = sendOps_.emplaceBack(sequenceNumber);\n  SendOperation& op = *opIter;\n  op.ptr = buffer.unwrap<CpuBuffer>().ptr;\n  op.length = length;\n  op.callback = std::move(callback);\n\n  sendOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceSendOperation(\n    SendOpIter opIter,\n    SendOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  SendOperation& op = *opIter;\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the connection.\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::WRITING,\n      /*cond=*/!error_ && prevOpState >= SendOperation::WRITING,\n      /*actions=*/{&ChannelImpl::write});\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::WRITING,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/op.doneWriting,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n}\n\nvoid ChannelImpl::write(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing payload (#\"\n             << op.sequenceNumber << \")\";\n  connection_->write(\n      op.ptr, op.length, callbackWrapper_([opIter](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done writing payload (#\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneWriting = true;\n        impl.sendOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::callSendCallback(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::recvImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TRecvCallback callback) {\n  RecvOpIter opIter = recvOps_.emplaceBack(sequenceNumber);\n  RecvOperation& op = *opIter;\n  op.ptr = buffer.unwrap<CpuBuffer>().ptr;\n  op.length = length;\n  op.callback = std::move(callback);\n\n  recvOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceRecvOperation(\n    RecvOpIter opIter,\n    RecvOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  RecvOperation& op = *opIter;\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of read calls on the connection.\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::READING,\n      /*cond=*/!error_ && prevOpState >= RecvOperation::READING,\n      /*actions=*/{&ChannelImpl::read});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::READING,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/op.doneReading,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n}\n\nvoid ChannelImpl::read(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading payload (#\"\n             << op.sequenceNumber << \")\";\n  connection_->read(\n      op.ptr,\n      op.length,\n      callbackWrapper_([opIter](\n                           ChannelImpl& impl,\n                           const void* /* unused */,\n                           size_t /* unused */) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done reading payload (#\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReading = true;\n        impl.recvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::callRecvCallback(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::handleErrorImpl() {\n  sendOps_.advanceAllOperations();\n  recvOps_.advanceAllOperations();\n\n  // Close the connection so that all current operations will be aborted. This\n  // will cause their callbacks to be invoked, and only then we'll invoke ours.\n  connection_->close();\n\n  context_->unenroll(*this);\n}\n\n} // namespace basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/basic/channel_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n\n#include <tensorpipe/channel/channel_impl_boilerplate.h>\n#include <tensorpipe/common/state_machine.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace basic {\n\nclass ContextImpl;\n\nstruct SendOperation {\n  enum State { UNINITIALIZED, WRITING, FINISHED };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneWriting{false};\n\n  // Arguments at creation\n  const void* ptr;\n  size_t length;\n  TSendCallback callback;\n};\n\n// State capturing a single recv operation.\nstruct RecvOperation {\n  enum State { UNINITIALIZED, READING, FINISHED };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReading{false};\n\n  // Arguments at creation\n  void* ptr;\n  size_t length;\n  TRecvCallback callback;\n};\n\nclass ChannelImpl final\n    : public ChannelImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  ChannelImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::shared_ptr<transport::Connection> connection);\n\n protected:\n  // Implement the entry points called by ChannelImplBoilerplate.\n  void initImplFromLoop() override;\n  void sendImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TSendCallback callback) override;\n  void recvImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TRecvCallback callback) override;\n  void handleErrorImpl() override;\n\n private:\n  const std::shared_ptr<transport::Connection> connection_;\n\n  OpsStateMachine<ChannelImpl, SendOperation> sendOps_{\n      *this,\n      &ChannelImpl::advanceSendOperation};\n  using SendOpIter = decltype(sendOps_)::Iter;\n  OpsStateMachine<ChannelImpl, RecvOperation> recvOps_{\n      *this,\n      &ChannelImpl::advanceRecvOperation};\n  using RecvOpIter = decltype(recvOps_)::Iter;\n\n  // State machines for send and recv ops.\n  void advanceSendOperation(\n      SendOpIter opIter,\n      SendOperation::State prevOpState);\n  void advanceRecvOperation(\n      RecvOpIter opIter,\n      RecvOperation::State prevOpState);\n\n  // Actions (i.e., methods that begin a state transition).\n  // For send operations:\n  void write(SendOpIter opIter);\n  void callSendCallback(SendOpIter opIter);\n  // For recv operations:\n  void read(RecvOpIter opIter);\n  void callRecvCallback(RecvOpIter opIter);\n};\n\n} // namespace basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/basic/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/basic/context_impl.h>\n\n#include <functional>\n#include <utility>\n\n#include <tensorpipe/channel/basic/channel_impl.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace basic {\n\nstd::shared_ptr<ContextImpl> ContextImpl::create() {\n  std::unordered_map<Device, std::string> deviceDescriptors = {\n      {Device{kCpuDeviceType, 0}, \"any\"}};\n  return std::make_shared<ContextImpl>(std::move(deviceDescriptors));\n}\n\nContextImpl::ContextImpl(\n    std::unordered_map<Device, std::string> deviceDescriptors)\n    : ContextImplBoilerplate<ContextImpl, ChannelImpl>(\n          std::move(deviceDescriptors)) {}\n\nstd::shared_ptr<Channel> ContextImpl::createChannel(\n    std::vector<std::shared_ptr<transport::Connection>> connections,\n    Endpoint /* unused */) {\n  TP_DCHECK_EQ(numConnectionsNeeded(), connections.size());\n  return createChannelInternal(std::move(connections[0]));\n}\n\nvoid ContextImpl::handleErrorImpl() {}\n\nvoid ContextImpl::joinImpl() {}\n\nbool ContextImpl::inLoop() const {\n  return loop_.inLoop();\n};\n\nvoid ContextImpl::deferToLoop(std::function<void()> fn) {\n  loop_.deferToLoop(std::move(fn));\n};\n\n} // namespace basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/basic/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <functional>\n\n#include <tensorpipe/channel/context_impl_boilerplate.h>\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/device.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace basic {\n\nclass ChannelImpl;\n\nclass ContextImpl final\n    : public ContextImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  static std::shared_ptr<ContextImpl> create();\n\n  explicit ContextImpl(\n      std::unordered_map<Device, std::string> deviceDescriptors);\n\n  std::shared_ptr<Channel> createChannel(\n      std::vector<std::shared_ptr<transport::Connection>> connections,\n      Endpoint endpoint);\n\n  // Implement the DeferredExecutor interface.\n  bool inLoop() const override;\n  void deferToLoop(std::function<void()> fn) override;\n\n protected:\n  // Implement the entry points called by ContextImplBoilerplate.\n  void handleErrorImpl() override;\n  void joinImpl() override;\n\n private:\n  OnDemandDeferredExecutor loop_;\n};\n\n} // namespace basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/basic/factory.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/basic/factory.h>\n\n#include <tensorpipe/channel/basic/channel_impl.h>\n#include <tensorpipe/channel/basic/context_impl.h>\n#include <tensorpipe/channel/context_boilerplate.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace basic {\n\nstd::shared_ptr<Context> create() {\n  return std::make_shared<ContextBoilerplate<ContextImpl, ChannelImpl>>();\n}\n\n} // namespace basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/basic/factory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <tensorpipe/channel/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace basic {\n\nstd::shared_ptr<Context> create();\n\n} // namespace basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/channel.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <functional>\n#include <string>\n\n#include <tensorpipe/channel/context.h>\n#include <tensorpipe/common/buffer.h>\n#include <tensorpipe/common/error.h>\n\n// Channels are an out of band mechanism to transfer data between\n// processes. Examples include a direct address space to address space\n// memory copy on the same machine, or a GPU-to-GPU memory copy.\n//\n// Construction of a channel happens as follows.\n//\n//   1) During initialization of a pipe, the connecting peer sends its\n//      list of channel contexts and their device descriptors. The\n//      device descriptor is used to determine whether or not a\n//      channel can be used by a pair of peers.\n//   2) The listening side of the pipe compares the list it received\n//      its own list to determine the list of channels that should be used\n//      for the peers.\n//   3) For every channel that should be constructed, the listening\n//      side registers a slot with its low level listener. These slots\n//      uniquely identify inbound connections on this listener (by\n//      sending a word-sized indentifier immediately after connecting)\n//      and can be used to construct new connections. These slots are\n//      sent to the connecting side of the pipe, which then attempts\n//      to establish a new connection for every token.\n//   4) At this time, we have a new control connection for every\n//      channel that is about to be constructed. Both sides of the\n//      pipe can now create the channel instance using the newly\n//      created connection. Further initialization that needs to\n//      happen is defered to the channel implementation. We assume the\n//      channel is usable from the moment it is constructed.\n//\nnamespace tensorpipe {\nnamespace channel {\n\nusing TSendCallback = std::function<void(const Error&)>;\nusing TRecvCallback = std::function<void(const Error&)>;\n\n// Abstract base class for channel classes.\nclass Channel {\n public:\n  // Send memory region to peer.\n  virtual void send(Buffer buffer, size_t length, TSendCallback callback) = 0;\n\n  // Receive memory region from peer.\n  virtual void recv(Buffer buffer, size_t length, TRecvCallback callback) = 0;\n\n  // Tell the channel what its identifier is.\n  //\n  // This is only supposed to be called from the high-level pipe. It will only\n  // used for logging and debugging purposes.\n  virtual void setId(std::string id) = 0;\n\n  // Put the channel in a terminal state, aborting pending operations and\n  // rejecting future ones, and release its resources. This may be carried out\n  // asynchronously, in background.\n  virtual void close() = 0;\n\n  virtual ~Channel() = default;\n};\n\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/channel_boilerplate.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstddef>\n#include <memory>\n#include <string>\n#include <type_traits>\n#include <utility>\n\n#include <tensorpipe/channel/channel.h>\n#include <tensorpipe/channel/channel_impl_boilerplate.h>\n\nnamespace tensorpipe {\nnamespace channel {\n\ntemplate <typename TCtx, typename TChan>\nclass ChannelBoilerplate : public Channel {\n public:\n  template <typename... Args>\n  ChannelBoilerplate(\n      typename ChannelImplBoilerplate<TCtx, TChan>::ConstructorToken token,\n      std::shared_ptr<TCtx> context,\n      std::string id,\n      Args&&... args);\n\n  explicit ChannelBoilerplate(std::shared_ptr<TChan> channel);\n\n  ChannelBoilerplate(const ChannelBoilerplate&) = delete;\n  ChannelBoilerplate(ChannelBoilerplate&&) = delete;\n  ChannelBoilerplate& operator=(const ChannelBoilerplate&) = delete;\n  ChannelBoilerplate& operator=(ChannelBoilerplate&&) = delete;\n\n  // Perform a send operation.\n  void send(Buffer buffer, size_t length, TSendCallback callback) override;\n\n  // Queue a recv operation.\n  void recv(Buffer buffer, size_t length, TRecvCallback callback) override;\n\n  // Tell the connection what its identifier is.\n  void setId(std::string id) override;\n\n  // Shut down the connection and its resources.\n  void close() override;\n\n  ~ChannelBoilerplate() override;\n\n protected:\n  // Using a shared_ptr allows us to detach the lifetime of the implementation\n  // from the public object's one and perform the destruction asynchronously.\n  const std::shared_ptr<TChan> impl_;\n};\n\ntemplate <typename TCtx, typename TChan>\ntemplate <typename... Args>\nChannelBoilerplate<TCtx, TChan>::ChannelBoilerplate(\n    typename ChannelImplBoilerplate<TCtx, TChan>::ConstructorToken token,\n    std::shared_ptr<TCtx> context,\n    std::string id,\n    Args&&... args)\n    : impl_(std::make_shared<TChan>(\n          token,\n          std::move(context),\n          std::move(id),\n          std::forward<Args>(args)...)) {\n  static_assert(\n      std::is_base_of<ChannelImplBoilerplate<TCtx, TChan>, TChan>::value, \"\");\n  impl_->init();\n}\n\ntemplate <typename TCtx, typename TChan>\nChannelBoilerplate<TCtx, TChan>::ChannelBoilerplate(\n    std::shared_ptr<TChan> channel)\n    : impl_(std::move(channel)) {\n  static_assert(\n      std::is_base_of<ChannelImplBoilerplate<TCtx, TChan>, TChan>::value, \"\");\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelBoilerplate<TCtx, TChan>::send(\n    Buffer buffer,\n    size_t length,\n    TSendCallback callback) {\n  if (unlikely(!impl_)) {\n    // FIXME In C++-17 perhaps a global static inline variable would be better?\n    static Error error = TP_CREATE_ERROR(ContextNotViableError);\n    callback(error);\n    return;\n  }\n  impl_->send(buffer, length, std::move(callback));\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelBoilerplate<TCtx, TChan>::recv(\n    Buffer buffer,\n    size_t length,\n    TRecvCallback callback) {\n  if (unlikely(!impl_)) {\n    // FIXME In C++-17 perhaps a global static inline variable would be better?\n    static Error error = TP_CREATE_ERROR(ContextNotViableError);\n    callback(error);\n    return;\n  }\n  impl_->recv(buffer, length, std::move(callback));\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelBoilerplate<TCtx, TChan>::setId(std::string id) {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->setId(std::move(id));\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelBoilerplate<TCtx, TChan>::close() {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->close();\n}\n\ntemplate <typename TCtx, typename TChan>\nChannelBoilerplate<TCtx, TChan>::~ChannelBoilerplate() {\n  close();\n}\n\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/channel_impl_boilerplate.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <utility>\n\n#include <tensorpipe/channel/channel.h>\n#include <tensorpipe/channel/error.h>\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/error_macros.h>\n\nnamespace tensorpipe {\nnamespace channel {\n\ntemplate <typename TCtx, typename TChan>\nclass ContextImplBoilerplate;\n\ntemplate <typename TCtx, typename TChan>\nclass ChannelImplBoilerplate : public std::enable_shared_from_this<TChan> {\n public:\n  class ConstructorToken {\n   public:\n    ConstructorToken(const ConstructorToken&) = default;\n\n   private:\n    explicit ConstructorToken() {}\n    friend ContextImplBoilerplate<TCtx, TChan>;\n  };\n\n  ChannelImplBoilerplate(\n      ConstructorToken token,\n      std::shared_ptr<TCtx> context,\n      std::string id);\n\n  ChannelImplBoilerplate(const ChannelImplBoilerplate&) = delete;\n  ChannelImplBoilerplate(ChannelImplBoilerplate&&) = delete;\n  ChannelImplBoilerplate& operator=(const ChannelImplBoilerplate&) = delete;\n  ChannelImplBoilerplate& operator=(ChannelImplBoilerplate&&) = delete;\n\n  // Initialize member fields that need `shared_from_this`.\n  void init();\n\n  // Perform a send operation.\n  void send(Buffer buffer, size_t length, TSendCallback callback);\n\n  // Queue a recv operation.\n  void recv(Buffer buffer, size_t length, TRecvCallback callback);\n\n  // Tell the connection what its identifier is.\n  void setId(std::string id);\n\n  // Shut down the connection and its resources.\n  void close();\n\n  virtual ~ChannelImplBoilerplate() = default;\n\n protected:\n  virtual void initImplFromLoop() = 0;\n  virtual void sendImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TSendCallback callback) = 0;\n  virtual void recvImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TRecvCallback callback) = 0;\n  virtual void handleErrorImpl() = 0;\n  virtual void setIdImpl() {}\n\n  void setError(Error error);\n\n  const std::shared_ptr<TCtx> context_;\n\n  Error error_{Error::kSuccess};\n\n  // An identifier for the connection, composed of the identifier for the\n  // context or listener, combined with an increasing sequence number. It will\n  // only be used for logging and debugging purposes.\n  std::string id_;\n\n  CallbackWrapper<TChan> callbackWrapper_{*this, *this->context_};\n\n private:\n  // Initialize member fields that need `shared_from_this`.\n  void initFromLoop();\n\n  // Perform a send operation.\n  void sendFromLoop(Buffer buffer, size_t length, TSendCallback callback);\n\n  // Queue a recv operation.\n  void recvFromLoop(Buffer buffer, size_t length, TRecvCallback callback);\n\n  void setIdFromLoop(std::string id);\n\n  // Shut down the connection and its resources.\n  void closeFromLoop();\n\n  // Deal with an error.\n  void handleError();\n\n  // A sequence number for the calls to send and recv.\n  uint64_t nextTensorBeingSent_{0};\n  uint64_t nextTensorBeingReceived_{0};\n\n  // For some odd reason it seems we need to use a qualified name here...\n  template <typename T>\n  friend class tensorpipe::CallbackWrapper;\n\n  // Contexts do sometimes need to call directly into closeFromLoop, in order to\n  // make sure that some of their operations can happen \"atomically\" on the\n  // connection, without possibly other operations occurring in between (e.g.,\n  // an error).\n  friend ContextImplBoilerplate<TCtx, TChan>;\n};\n\ntemplate <typename TCtx, typename TChan>\nChannelImplBoilerplate<TCtx, TChan>::ChannelImplBoilerplate(\n    ConstructorToken /* unused */,\n    std::shared_ptr<TCtx> context,\n    std::string id)\n    : context_(std::move(context)), id_(std::move(id)) {}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::init() {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}]() { impl->initFromLoop(); });\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::initFromLoop() {\n  if (context_->closed()) {\n    // Set the error without calling setError because we do not want to invoke\n    // the subclass's handleErrorImpl as it would find itself in a weird state\n    // (since initFromLoop wouldn't have been called).\n    error_ = TP_CREATE_ERROR(ChannelClosedError);\n    TP_VLOG(4) << \"Channel \" << id_ << \" is closing (without initing)\";\n    return;\n  }\n\n  initImplFromLoop();\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::send(\n    Buffer buffer,\n    size_t length,\n    TSendCallback callback) {\n  context_->deferToLoop([impl{this->shared_from_this()},\n                         buffer,\n                         length,\n                         callback{std::move(callback)}]() mutable {\n    impl->sendFromLoop(buffer, length, std::move(callback));\n  });\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::sendFromLoop(\n    Buffer buffer,\n    size_t length,\n    TSendCallback callback) {\n  TP_DCHECK(context_->inLoop());\n\n  const uint64_t sequenceNumber = nextTensorBeingSent_++;\n  TP_VLOG(4) << \"Channel \" << id_ << \" received a send request (#\"\n             << sequenceNumber << \")\";\n\n  callback = [this, sequenceNumber, callback{std::move(callback)}](\n                 const Error& error) {\n    // There is no requirement for the channel to invoke callbacks in order.\n    TP_VLOG(4) << \"Channel \" << id_ << \" is calling a send callback (#\"\n               << sequenceNumber << \")\";\n    callback(error);\n    TP_VLOG(4) << \"Channel \" << id_ << \" done calling a send callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  if (error_) {\n    callback(error_);\n    return;\n  }\n\n  sendImplFromLoop(sequenceNumber, buffer, length, std::move(callback));\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::recv(\n    Buffer buffer,\n    size_t length,\n    TRecvCallback callback) {\n  context_->deferToLoop([impl{this->shared_from_this()},\n                         buffer,\n                         length,\n                         callback{std::move(callback)}]() mutable {\n    impl->recvFromLoop(buffer, length, std::move(callback));\n  });\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::recvFromLoop(\n    Buffer buffer,\n    size_t length,\n    TRecvCallback callback) {\n  TP_DCHECK(context_->inLoop());\n\n  const uint64_t sequenceNumber = nextTensorBeingReceived_++;\n  TP_VLOG(4) << \"Channel \" << id_ << \" received a recv request (#\"\n             << sequenceNumber << \")\";\n\n  callback = [this, sequenceNumber, callback{std::move(callback)}](\n                 const Error& error) {\n    // There is no requirement for the channel to invoke callbacks in order.\n    TP_VLOG(4) << \"Channel \" << id_ << \" is calling a recv callback (#\"\n               << sequenceNumber << \")\";\n    callback(error);\n    TP_VLOG(4) << \"Channel \" << id_ << \" done calling a recv callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  if (error_) {\n    callback(error_);\n    return;\n  }\n\n  recvImplFromLoop(sequenceNumber, buffer, length, std::move(callback));\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::setId(std::string id) {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}, id{std::move(id)}]() mutable {\n        impl->setIdFromLoop(std::move(id));\n      });\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::setIdFromLoop(std::string id) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(4) << \"Channel \" << id_ << \" was renamed to \" << id;\n  id_ = std::move(id);\n  setIdImpl();\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::close() {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}]() { impl->closeFromLoop(); });\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::closeFromLoop() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(4) << \"Channel \" << id_ << \" is closing\";\n  setError(TP_CREATE_ERROR(ChannelClosedError));\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::setError(Error error) {\n  // Don't overwrite an error that's already set.\n  if (error_ || !error) {\n    return;\n  }\n\n  error_ = std::move(error);\n\n  handleError();\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ChannelImplBoilerplate<TCtx, TChan>::handleError() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(5) << \"Channel \" << id_ << \" is handling error \" << error_.what();\n\n  handleErrorImpl();\n}\n\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cma/channel_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cma/channel_impl.h>\n\n#include <memory>\n#include <string>\n#include <utility>\n\n#include <nop/serializer.h>\n#include <nop/structure.h>\n\n#include <tensorpipe/channel/cma/context_impl.h>\n#include <tensorpipe/common/cpu_buffer.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/transport/connection.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cma {\n\nnamespace {\n\nstruct Descriptor {\n  uint32_t pid;\n  uint64_t ptr;\n  NOP_STRUCTURE(Descriptor, pid, ptr);\n};\n\n} // namespace\n\nChannelImpl::ChannelImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::shared_ptr<transport::Connection> descriptorConnection,\n    std::shared_ptr<transport::Connection> completionConnection)\n    : ChannelImplBoilerplate<ContextImpl, ChannelImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      descriptorConnection_(std::move(descriptorConnection)),\n      completionConnection_(std::move(completionConnection)) {}\n\nvoid ChannelImpl::initImplFromLoop() {\n  context_->enroll(*this);\n}\n\nvoid ChannelImpl::sendImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TSendCallback callback) {\n  SendOpIter opIter = sendOps_.emplaceBack(sequenceNumber);\n  SendOperation& op = *opIter;\n  op.callback = std::move(callback);\n  op.ptr = buffer.unwrap<CpuBuffer>().ptr;\n  op.length = length;\n\n  sendOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceSendOperation(\n    SendOpIter opIter,\n    SendOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  SendOperation& op = *opIter;\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the descriptor control connection and read calls on the\n  // completion control connection.\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::READING_COMPLETION,\n      /*cond=*/!error_ && prevOpState >= SendOperation::READING_COMPLETION,\n      /*actions=*/\n      {&ChannelImpl::writeDescriptor, &ChannelImpl::readCompletion});\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::READING_COMPLETION,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/op.doneReadingCompletion,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n}\n\nvoid ChannelImpl::writeDescriptor(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  auto nopHolder = std::make_shared<NopHolder<Descriptor>>();\n  Descriptor& nopDescriptor = nopHolder->getObject();\n  // TODO: Store the PID upon channel/context instantiation.\n  nopDescriptor.pid = ::getpid();\n  nopDescriptor.ptr = reinterpret_cast<uint64_t>(op.ptr);\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing descriptor (#\"\n             << op.sequenceNumber << \")\";\n  descriptorConnection_->write(\n      *nopHolder,\n      callbackWrapper_([sequenceNumber{op.sequenceNumber},\n                        nopHolder](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done writing descriptor (#\"\n                   << sequenceNumber << \")\";\n      }));\n}\n\nvoid ChannelImpl::readCompletion(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading completion (#\"\n             << op.sequenceNumber << \")\";\n  completionConnection_->read(\n      nullptr,\n      0,\n      callbackWrapper_([opIter](\n                           ChannelImpl& impl,\n                           const void* /* unused */,\n                           size_t /* unused */) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done reading completion (#\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingCompletion = true;\n        impl.sendOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::callSendCallback(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::recvImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TRecvCallback callback) {\n  RecvOpIter opIter = recvOps_.emplaceBack(sequenceNumber);\n  RecvOperation& op = *opIter;\n  op.ptr = buffer.unwrap<CpuBuffer>().ptr;\n  op.length = length;\n  op.callback = std::move(callback);\n\n  recvOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceRecvOperation(\n    RecvOpIter opIter,\n    RecvOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  RecvOperation& op = *opIter;\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of read calls on the descriptor control connection.\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::READING_DESCRIPTOR,\n      /*cond=*/!error_ && prevOpState >= RecvOperation::READING_DESCRIPTOR,\n      /*actions=*/{&ChannelImpl::readDescriptor});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::READING_DESCRIPTOR,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ && op.doneReadingDescriptor,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::READING_DESCRIPTOR,\n      /*to=*/RecvOperation::COPYING,\n      /*cond=*/!error_ && op.doneReadingDescriptor,\n      /*actions=*/{&ChannelImpl::copy});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::COPYING,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ && op.doneCopying,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the completion control connection.\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::COPYING,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/!error_ && op.doneCopying &&\n          prevOpState >= RecvOperation::FINISHED,\n      /*actions=*/\n      {&ChannelImpl::callRecvCallback, &ChannelImpl::writeCompletion});\n}\n\nvoid ChannelImpl::readDescriptor(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading descriptor (#\"\n             << op.sequenceNumber << \")\";\n  auto nopHolderIn = std::make_shared<NopHolder<Descriptor>>();\n  descriptorConnection_->read(\n      *nopHolderIn, callbackWrapper_([opIter, nopHolderIn](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done reading descriptor (#\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingDescriptor = true;\n        if (!impl.error_) {\n          Descriptor& nopDescriptor = nopHolderIn->getObject();\n          opIter->remotePid = nopDescriptor.pid;\n          opIter->remotePtr = reinterpret_cast<void*>(nopDescriptor.ptr);\n        }\n        impl.recvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::copy(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is copying payload (#\"\n             << op.sequenceNumber << \")\";\n  context_->requestCopy(\n      op.remotePid,\n      op.remotePtr,\n      op.ptr,\n      op.length,\n      callbackWrapper_([opIter](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done copying payload (#\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneCopying = true;\n        impl.recvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::callRecvCallback(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::writeCompletion(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing completion (#\"\n             << op.sequenceNumber << \")\";\n  completionConnection_->write(\n      nullptr,\n      0,\n      callbackWrapper_([sequenceNumber{op.sequenceNumber}](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done writing completion (#\"\n                   << sequenceNumber << \")\";\n      }));\n}\n\nvoid ChannelImpl::handleErrorImpl() {\n  sendOps_.advanceAllOperations();\n  recvOps_.advanceAllOperations();\n\n  descriptorConnection_->close();\n  completionConnection_->close();\n\n  context_->unenroll(*this);\n}\n\n} // namespace cma\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cma/channel_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n\n#include <tensorpipe/channel/channel_impl_boilerplate.h>\n#include <tensorpipe/common/state_machine.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cma {\n\nclass ContextImpl;\n\nstruct SendOperation {\n  enum State { UNINITIALIZED, READING_COMPLETION, FINISHED };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReadingCompletion{false};\n\n  // Arguments at creation\n  void* ptr;\n  size_t length;\n  TSendCallback callback;\n};\n\nstruct RecvOperation {\n  enum State { UNINITIALIZED, READING_DESCRIPTOR, COPYING, FINISHED };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReadingDescriptor{false};\n  bool doneCopying{false};\n\n  // Arguments at creation\n  void* ptr;\n  size_t length;\n  TRecvCallback callback;\n\n  // Other data\n  pid_t remotePid;\n  void* remotePtr;\n};\n\nclass ChannelImpl final\n    : public ChannelImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  ChannelImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::shared_ptr<transport::Connection> descriptorConnection,\n      std::shared_ptr<transport::Connection> completionConnection);\n\n protected:\n  // Implement the entry points called by ChannelImplBoilerplate.\n  void initImplFromLoop() override;\n  void sendImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TSendCallback callback) override;\n  void recvImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TRecvCallback callback) override;\n  void handleErrorImpl() override;\n\n private:\n  const std::shared_ptr<transport::Connection> descriptorConnection_;\n  const std::shared_ptr<transport::Connection> completionConnection_;\n\n  OpsStateMachine<ChannelImpl, SendOperation> sendOps_{\n      *this,\n      &ChannelImpl::advanceSendOperation};\n  using SendOpIter = decltype(sendOps_)::Iter;\n  OpsStateMachine<ChannelImpl, RecvOperation> recvOps_{\n      *this,\n      &ChannelImpl::advanceRecvOperation};\n  using RecvOpIter = decltype(recvOps_)::Iter;\n\n  // State machines for send and recv ops.\n  void advanceSendOperation(\n      SendOpIter opIter,\n      SendOperation::State prevOpState);\n  void advanceRecvOperation(\n      RecvOpIter opIter,\n      RecvOperation::State prevOpState);\n\n  // Actions (i.e., methods that begin a state transition).\n  // For send operations:\n  void writeDescriptor(SendOpIter opIter);\n  void readCompletion(SendOpIter opIter);\n  void callSendCallback(SendOpIter opIter);\n  // For recv operations:\n  void readDescriptor(RecvOpIter opIter);\n  void copy(RecvOpIter opIter);\n  void callRecvCallback(RecvOpIter opIter);\n  void writeCompletion(RecvOpIter opIter);\n};\n\n} // namespace cma\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cma/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cma/context_impl.h>\n\n#include <linux/prctl.h>\n#include <sys/prctl.h>\n#include <sys/syscall.h>\n#include <sys/uio.h>\n#include <unistd.h>\n\n#include <functional>\n#include <limits>\n#include <sstream>\n#include <string>\n#include <thread>\n#include <utility>\n\n#include <tensorpipe/channel/cma/channel_impl.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/strings.h>\n#include <tensorpipe/common/system.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cma {\n\nnamespace {\n\n// Prepend descriptor with transport name so it's easy to\n// disambiguate descriptors when debugging.\nconst std::string kDomainDescriptorPrefix{\"cma:\"};\n\nError callProcessVmReadv(\n    void* localPtr,\n    void* remotePtr,\n    size_t length,\n    pid_t pid) {\n#ifdef SYS_process_vm_readv\n  struct iovec localIov {\n    .iov_base = localPtr, .iov_len = length\n  };\n  struct iovec remoteIov {\n    .iov_base = remotePtr, .iov_len = length\n  };\n  ssize_t nread = static_cast<ssize_t>(::syscall(\n      SYS_process_vm_readv,\n      pid,\n      &localIov,\n      /*liovcnt=*/static_cast<unsigned long>(1),\n      &remoteIov,\n      /*riovcnt=*/static_cast<unsigned long>(1),\n      /*flags=*/static_cast<unsigned long>(0)));\n  if (nread < 0) {\n    return TP_CREATE_ERROR(SystemError, \"process_vm_readv\", errno);\n  } else if (nread != length) {\n    return TP_CREATE_ERROR(ShortReadError, length, nread);\n  }\n  return Error::kSuccess;\n#else\n  return TP_CREATE_ERROR(SystemError, \"process_vm_readv\", ENOSYS);\n#endif\n}\n\nclass BadReadError final : public BaseError {\n public:\n  BadReadError(uint64_t expected, uint64_t actual)\n      : expected_(expected), actual_(actual) {}\n\n  std::string what() const override {\n    std::ostringstream oss;\n    oss << \"Expected to read \" << expected_ << \", got \" << actual_;\n    return oss.str();\n  }\n\n private:\n  const uint64_t expected_;\n  const uint64_t actual_;\n};\n\n// Old versions of Docker use a default seccomp-bpf rule that blocks some\n// ptrace-related syscalls. To find this out, we attempt such a call against\n// ourselves, which is always allowed (it shortcuts all checks, including LSMs),\n// hence a failure can only come from a \"filter\" on the syscall.\n// Or, in fact, it could also happen if the kernel doesn't support the syscall.\nError attemptProcessVmReadvSyscallOnSelf() {\n  uint64_t someSourceValue = 0x0123456789abcdef;\n  uint64_t someTargetValue = 0;\n  Error error = callProcessVmReadv(\n      &someTargetValue, &someSourceValue, sizeof(uint64_t), ::getpid());\n  if (error) {\n    return error;\n  }\n  if (someTargetValue != someSourceValue) {\n    return TP_CREATE_ERROR(BadReadError, someSourceValue, someTargetValue);\n  }\n  return Error::kSuccess;\n}\n\n// According to read(2):\n// > On Linux, read() (and similar system calls) will transfer at most\n// > 0x7ffff000 (2,147,479,552) bytes, returning the number of bytes actually\n// > transferred. (This is true on both 32-bit and 64-bit systems.)\nconstexpr size_t kMaxBytesReadableAtOnce = 0x7ffff000;\n\nError performCopy(\n    void* localPtr,\n    void* remotePtr,\n    size_t length,\n    pid_t remotePid) {\n  for (size_t offset = 0; offset < length; offset += kMaxBytesReadableAtOnce) {\n    Error error = callProcessVmReadv(\n        reinterpret_cast<uint8_t*>(localPtr) + offset,\n        reinterpret_cast<uint8_t*>(remotePtr) + offset,\n        std::min(length - offset, kMaxBytesReadableAtOnce),\n        remotePid);\n    if (error) {\n      return error;\n    }\n  }\n  return Error::kSuccess;\n}\n\n} // namespace\n\nstd::shared_ptr<ContextImpl> ContextImpl::create() {\n  int rv;\n  std::ostringstream oss;\n  oss << kDomainDescriptorPrefix;\n\n  // This transport only works across processes on the same machine, and we\n  // detect that by computing the boot ID.\n  optional<std::string> bootID = getBootID();\n  TP_THROW_ASSERT_IF(!bootID.has_value()) << \"Unable to read boot_id\";\n  oss << bootID.value();\n\n  // An endpoint can see the other through its PID if the latter is in a child\n  // PID namespace of the former. Since the channel is bidirectional this must\n  // be symmetric and thus the PID namespaces must be the same.\n  optional<std::string> pidNsID = getLinuxNamespaceId(LinuxNamespace::kPid);\n  if (!pidNsID.has_value()) {\n    TP_VLOG(5) << \"Unable to read pid namespace ID\";\n    return nullptr;\n  }\n  oss << '_' << pidNsID.value();\n\n  // The ability to call process_vm_readv on a target is controlled by the\n  // PTRACE_MODE_ATTACH_REALCREDS check (see process_vm_readv(2)). We'll go\n  // through its checklist, step by step (which is found in ptrace(2)). We will\n  // ignore the CAP_SYS_PTRACE conditions (i.e., we'll assume we don't have that\n  // capability) because they are hard to check, and typically not needed.\n\n  // We'll skip the check on whether the endpoints are two threads of the same\n  // process (in which case ptrace is always allowed) because it's hard to fit\n  // it in the descriptor and because we have some other more specialized\n  // channels for that case.\n\n  // The next step involves comparing user and group IDs. If the processes are\n  // in user namespaces the kernel first maps these IDs back to the top-level\n  // (\"initial\") ones and compares those. We can't do such mapping, thus we\n  // compare the IDs as integers as we see them and thus for this to work\n  // properly we require that the two endpoints are in the same user namespace.\n  // This does not in fact constitute an extra restriction since the later\n  // commoncap/capability LSM check will need to enforce this too.\n  optional<std::string> userNsID = getLinuxNamespaceId(LinuxNamespace::kUser);\n  if (!userNsID.has_value()) {\n    TP_VLOG(5) << \"Unable to read user namespace ID\";\n    return nullptr;\n  }\n  oss << '_' << userNsID.value();\n\n  // It is required that our *real* user ID matches the real, effective and\n  // saved-set user IDs of the target. And the same must hold for group IDs.\n  // As the channel is bidirectional, the reverse must also hold, which means\n  // our real, effective and saved-set IDs must all be equal and must match the\n  // other endpoint's ones.\n  uid_t realUserId, effectiveUserId, savedSetUserId;\n  gid_t realGroupId, effectiveGroupId, savedSetGroupId;\n  rv = ::getresuid(&realUserId, &effectiveUserId, &savedSetUserId);\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n  rv = ::getresgid(&realGroupId, &effectiveGroupId, &savedSetGroupId);\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n  if (realUserId != effectiveUserId || realUserId != savedSetUserId ||\n      realGroupId != effectiveGroupId || realGroupId != savedSetGroupId) {\n    TP_VLOG(5) << \"User IDs or group IDs aren't all equal. User IDs are \"\n               << realUserId << \" (real), \" << effectiveUserId\n               << \" (effective) and \" << savedSetUserId\n               << \" (saved-set). Group IDs are \" << realGroupId << \" (real), \"\n               << effectiveGroupId << \" (effective) and \" << savedSetGroupId\n               << \" (saved-set).\";\n    return nullptr;\n  }\n  oss << '_' << realUserId << '_' << realGroupId;\n\n  // The target must be dumpable. Which, due to symmetry, means we must be\n  // dumpable too.\n  rv = ::prctl(PR_GET_DUMPABLE, 0, 0, 0, 0);\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n  // SUID_DUMP_USER has a value of 1.\n  if (rv != 1) {\n    TP_VLOG(5) << \"Process isn't dumpable\";\n    return nullptr;\n  }\n\n  // Next the Linux Security Modules (LSMs) kick in. Since users could register\n  // third-party LSMs we'll need to draw a line in what we support. We have two\n  // options with unsupported LSMs: play it safe and assume the LSM will reject\n  // the check, or \"trust\" the user and make them responsible to deal with the\n  // LSMs they added. We're leaning for the latter, as often some LSMs like\n  // AppArmor or SELinux are enabled without actually restricting anything. For\n  // now we'll support the LSMs that are found by default on common distros,\n  // but we can include support for more of them if that becomes necessary.\n  optional<std::vector<std::string>> lsms = getLinuxSecurityModules();\n  bool yamaOptional = false;\n  if (!lsms.has_value()) {\n    // This could happen if /sys/kernel/security/lsm cannot be opened. Although\n    // that file looks like it resides on sysfs, it's actually on the securityfs\n    // VFS, which is sometimes not bind-mounted inside containers. In such cases\n    // rather than failing hard we'll check a couple of reasonable LSMs.\n    TP_VLOG(5) << \"Couldn't detect the active Linux Security Modules\";\n    lsms.emplace();\n    *lsms = {\"capability\", \"yama\"};\n    // We don't know whether YAMA is really there, hence we'll remember to\n    // tolerate any failures later on.\n    yamaOptional = true;\n  } else {\n    TP_VLOG(5) << \"Detected these Linux Security Modules: \" << joinStrs(*lsms);\n  }\n  // FIXME Can we assume that the two endpoints will see the same list of LSMs,\n  // or should we incorporate that into the domain descriptor?\n  for (const std::string& lsm : lsms.value()) {\n    if (lsm == \"capability\") {\n      // We already checked that the endpoints are in the same user namespace.\n      // We must check they have the same permitted capabilities in it.\n      optional<std::string> caps = getPermittedCapabilitiesID();\n      TP_THROW_ASSERT_IF(!caps.has_value())\n          << \"Unable to obtain permitted capabilities\";\n      oss << '_' << caps.value();\n    } else if (lsm == \"yama\") {\n      optional<YamaPtraceScope> yamaScope = getYamaPtraceScope();\n      if (!yamaScope.has_value()) {\n        TP_THROW_ASSERT_IF(!yamaOptional)\n            << \"Unable to retrieve YAMA ptrace scope\";\n        continue;\n      }\n      switch (yamaScope.value()) {\n        case YamaPtraceScope::kClassicPtracePermissions:\n          TP_VLOG(5) << \"YAMA ptrace scope set to classic ptrace permissions\";\n          break;\n        case YamaPtraceScope::kRestrictedPtrace:\n          TP_VLOG(5) << \"YAMA ptrace scope set to restricted ptrace\";\n          // FIXME It's not really great to change a global property of the\n          // process, especially a security-related one. An \"excuse\" for doing\n          // so is that UCT does the same:\n          // https://github.com/openucx/ucx/blob/4d9976b6b8f8faae609c078c72aad8e5b842c43f/src/uct/sm/scopy/cma/cma_md.c#L61\n#ifndef PR_SET_PTRACER\n// https://github.com/torvalds/linux/blob/master/include/uapi/linux/prctl.h\n#define PR_SET_PTRACER 0x59616d61\n#endif\n#ifndef PR_SET_PTRACER_ANY\n// https://github.com/torvalds/linux/blob/master/include/uapi/linux/prctl.h\n#define PR_SET_PTRACER_ANY ((unsigned long)-1)\n#endif\n          rv = ::prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);\n          TP_THROW_SYSTEM_IF(rv < 0, errno);\n          break;\n        case YamaPtraceScope::kAdminOnlyAttach:\n          TP_VLOG(5) << \"YAMA ptrace scope set to admin-only attach\";\n          return nullptr;\n        case YamaPtraceScope::kNoAttach:\n          TP_VLOG(5) << \"YAMA ptrace scope set to no attach\";\n          return nullptr;\n        default:\n          TP_THROW_ASSERT() << \"Unknown YAMA ptrace scope\";\n      }\n    }\n  }\n\n  // In addition to the ptrace check, in some cases (I'm looking at you Docker)\n  // the process_vm_readv syscall is outright blocked by seccomp-bpf. Or just\n  // unsupported by the kernel.\n  Error error = attemptProcessVmReadvSyscallOnSelf();\n  if (error) {\n    TP_VLOG(5)\n        << \"The process_vm_readv syscall appears to be unavailable or blocked: \"\n        << error.what();\n    return nullptr;\n  }\n\n  std::string domainDescriptor = oss.str();\n  TP_VLOG(5) << \"The domain descriptor for CMA is \" << domainDescriptor;\n\n  std::unordered_map<Device, std::string> deviceDescriptors = {\n      {Device{kCpuDeviceType, 0}, std::move(domainDescriptor)}};\n\n  return std::make_shared<ContextImpl>(std::move(deviceDescriptors));\n}\n\nContextImpl::ContextImpl(\n    std::unordered_map<Device, std::string> deviceDescriptors)\n    : ContextImplBoilerplate<ContextImpl, ChannelImpl>(\n          std::move(deviceDescriptors)) {\n  thread_ = std::thread(&ContextImpl::handleCopyRequests, this);\n}\n\nstd::shared_ptr<Channel> ContextImpl::createChannel(\n    std::vector<std::shared_ptr<transport::Connection>> connections,\n    Endpoint /* unused */) {\n  TP_DCHECK_EQ(numConnectionsNeeded(), connections.size());\n  return createChannelInternal(\n      std::move(connections[0]), std::move(connections[1]));\n}\n\nsize_t ContextImpl::numConnectionsNeeded() const {\n  return 2;\n}\n\nvoid ContextImpl::handleErrorImpl() {\n  requests_.push(nullopt);\n}\n\nvoid ContextImpl::joinImpl() {\n  thread_.join();\n  // TP_DCHECK(requests_.empty());\n}\n\nbool ContextImpl::inLoop() const {\n  return loop_.inLoop();\n};\n\nvoid ContextImpl::deferToLoop(std::function<void()> fn) {\n  loop_.deferToLoop(std::move(fn));\n};\n\nvoid ContextImpl::requestCopy(\n    pid_t remotePid,\n    void* remotePtr,\n    void* localPtr,\n    size_t length,\n    std::function<void(const Error&)> fn) {\n  uint64_t requestId = nextRequestId_++;\n  TP_VLOG(4) << \"Channel context \" << id_ << \" received a copy request (#\"\n             << requestId << \")\";\n\n  fn = [this, requestId, fn{std::move(fn)}](const Error& error) {\n    TP_VLOG(4) << \"Channel context \" << id_\n               << \" is calling a copy request callback (#\" << requestId << \")\";\n    fn(error);\n    TP_VLOG(4) << \"Channel context \" << id_\n               << \" done calling a copy request callback (#\" << requestId\n               << \")\";\n  };\n\n  requests_.push(\n      CopyRequest{remotePid, remotePtr, localPtr, length, std::move(fn)});\n}\n\nvoid ContextImpl::handleCopyRequests() {\n  setThreadName(\"TP_CMA_loop\");\n  while (true) {\n    auto maybeRequest = requests_.pop();\n    if (!maybeRequest.has_value()) {\n      break;\n    }\n    CopyRequest request = std::move(maybeRequest).value();\n\n    request.callback(performCopy(\n        request.localPtr,\n        request.remotePtr,\n        request.length,\n        request.remotePid));\n  }\n}\n\n} // namespace cma\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cma/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <functional>\n#include <thread>\n\n#include <tensorpipe/channel/context_impl_boilerplate.h>\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/device.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/common/queue.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cma {\n\nclass ChannelImpl;\n\nclass ContextImpl final\n    : public ContextImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  static std::shared_ptr<ContextImpl> create();\n\n  explicit ContextImpl(\n      std::unordered_map<Device, std::string> deviceDescriptors);\n\n  std::shared_ptr<Channel> createChannel(\n      std::vector<std::shared_ptr<transport::Connection>> connections,\n      Endpoint endpoint);\n\n  size_t numConnectionsNeeded() const override;\n\n  // Implement the DeferredExecutor interface.\n  bool inLoop() const override;\n  void deferToLoop(std::function<void()> fn) override;\n\n  using copy_request_callback_fn = std::function<void(const Error&)>;\n\n  void requestCopy(\n      pid_t remotePid,\n      void* remotePtr,\n      void* localPtr,\n      size_t length,\n      copy_request_callback_fn fn);\n\n protected:\n  // Implement the entry points called by ContextImplBoilerplate.\n  void handleErrorImpl() override;\n  void joinImpl() override;\n\n private:\n  OnDemandDeferredExecutor loop_;\n\n  struct CopyRequest {\n    pid_t remotePid;\n    void* remotePtr;\n    void* localPtr;\n    size_t length;\n    copy_request_callback_fn callback;\n  };\n\n  std::thread thread_;\n  Queue<optional<CopyRequest>> requests_{std::numeric_limits<int>::max()};\n\n  // This is atomic because it may be accessed from outside the loop.\n  std::atomic<uint64_t> nextRequestId_{0};\n\n  void handleCopyRequests();\n};\n\n} // namespace cma\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cma/factory.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cma/factory.h>\n\n#include <tensorpipe/channel/cma/channel_impl.h>\n#include <tensorpipe/channel/cma/context_impl.h>\n#include <tensorpipe/channel/context_boilerplate.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cma {\n\nstd::shared_ptr<Context> create() {\n  return std::make_shared<ContextBoilerplate<ContextImpl, ChannelImpl>>();\n}\n\n} // namespace cma\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cma/factory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <tensorpipe/channel/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cma {\n\nstd::shared_ptr<Context> create();\n\n} // namespace cma\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/context.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <unordered_map>\n#include <vector>\n\n#include <tensorpipe/common/buffer.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\n\nenum class Endpoint : bool { kConnect, kListen };\n\nclass Channel;\n\n// Abstract base class for channel context classes.\n//\n// Instances of these classes are expected to be registered with a\n// context. All registered instances are assumed to be eligible\n// channels for all pairs.\n//\nclass Context {\n public:\n  // Return whether the context is able to operate correctly.\n  //\n  // Some channel types may be unable to perform as intended under some\n  // circumstances (e.g., specialized hardware unavailable, lack of\n  // permissions). They can report it through this method in order for\n  // the core context to avoid registering them in the first place.\n  //\n  virtual bool isViable() const = 0;\n\n  // Return the number of control connections needed to create an instance of\n  // this channel.\n  //\n  // Most channels require only one, but some require more (cuda_basic), and\n  // some might require none.\n  //\n  virtual size_t numConnectionsNeeded() const = 0;\n\n  // Return a map from supported devices to strings describing the device from\n  // the channel's perspective.\n  //\n  // Two processes with a channel context of the same type can leverage this\n  // channel to make two devices communicate if one side's device descriptor is\n  // \"accepted\" by the other one, using the canCommunicateWithRemote method\n  // below. That method must be symmetric, and unless overridden defaults to\n  // string comparison.\n  //\n  virtual const std::unordered_map<Device, std::string>& deviceDescriptors()\n      const = 0;\n\n  // Compare local and remote device descriptors for compatibility.\n  //\n  // Determine whether a channel can be opened between a local device and\n  // a remote one that has the given device descriptor. This function\n  // needs to be symmetric: if we called this method on the remote\n  // context with the local descriptor we should get the same answer.\n  // Unless overridden it defaults to string comparison.\n  //\n  virtual bool canCommunicateWithRemote(\n      const std::string& localDeviceDescriptor,\n      const std::string& remoteDeviceDescriptor) const = 0;\n\n  // Return newly created channel using the specified connections.\n  //\n  // It is up to the channel to either use these connections for further\n  // initialization, or use them directly. Either way, the returned\n  // channel should be immediately usable. If the channel isn't fully\n  // initialized yet, take care to queue these operations to execute\n  // as soon as initialization has completed.\n  //\n  virtual std::shared_ptr<Channel> createChannel(\n      std::vector<std::shared_ptr<transport::Connection>>,\n      Endpoint) = 0;\n\n  // Tell the context what its identifier is.\n  //\n  // This is only supposed to be called from the high-level context. It will\n  // only used for logging and debugging purposes.\n  virtual void setId(std::string id) = 0;\n\n  // Put the channel context in a terminal state, in turn closing all of its\n  // channels, and release its resources. This may be done asynchronously, in\n  // background.\n  virtual void close() = 0;\n\n  // Wait for all resources to be released and all background activity to stop.\n  virtual void join() = 0;\n\n  virtual ~Context() = default;\n\n private:\n  std::string name_;\n};\n\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/context_boilerplate.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <type_traits>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/channel/context.h>\n#include <tensorpipe/channel/context_impl_boilerplate.h>\n\nnamespace tensorpipe {\nnamespace channel {\n\ntemplate <typename TCtx, typename TChan>\nclass ContextBoilerplate : public Context {\n public:\n  template <typename... Args>\n  explicit ContextBoilerplate(Args&&... args);\n\n  ContextBoilerplate(const ContextBoilerplate&) = delete;\n  ContextBoilerplate(ContextBoilerplate&&) = delete;\n  ContextBoilerplate& operator=(const ContextBoilerplate&) = delete;\n  ContextBoilerplate& operator=(ContextBoilerplate&&) = delete;\n\n  std::shared_ptr<Channel> createChannel(\n      std::vector<std::shared_ptr<transport::Connection>> connections,\n      Endpoint endpoint) override;\n\n  size_t numConnectionsNeeded() const override;\n\n  bool isViable() const override;\n\n  const std::unordered_map<Device, std::string>& deviceDescriptors()\n      const override;\n\n  bool canCommunicateWithRemote(\n      const std::string& localDeviceDescriptor,\n      const std::string& remoteDeviceDescriptor) const override;\n\n  void setId(std::string id) override;\n\n  void close() override;\n\n  void join() override;\n\n  ~ContextBoilerplate() override;\n\n protected:\n  // The implementation is managed by a shared_ptr because each child object\n  // will also hold a shared_ptr to it. However, its lifetime is tied to the one\n  // of this public object since when the latter is destroyed the implementation\n  // is closed and joined.\n  const std::shared_ptr<TCtx> impl_;\n};\n\ntemplate <typename TCtx, typename TChan>\ntemplate <typename... Args>\nContextBoilerplate<TCtx, TChan>::ContextBoilerplate(Args&&... args)\n    : impl_(TCtx::create(std::forward<Args>(args)...)) {\n  static_assert(\n      std::is_base_of<ChannelImplBoilerplate<TCtx, TChan>, TChan>::value, \"\");\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->init();\n}\n\ntemplate <typename TCtx, typename TChan>\nstd::shared_ptr<Channel> ContextBoilerplate<TCtx, TChan>::createChannel(\n    std::vector<std::shared_ptr<transport::Connection>> connections,\n    Endpoint endpoint) {\n  if (unlikely(!impl_)) {\n    return std::make_shared<ChannelBoilerplate<TCtx, TChan>>(nullptr);\n  }\n  return impl_->createChannel(std::move(connections), endpoint);\n}\n\ntemplate <typename TCtx, typename TChan>\nsize_t ContextBoilerplate<TCtx, TChan>::numConnectionsNeeded() const {\n  if (unlikely(!impl_)) {\n    return 0;\n  }\n  return impl_->numConnectionsNeeded();\n}\n\ntemplate <typename TCtx, typename TChan>\nbool ContextBoilerplate<TCtx, TChan>::isViable() const {\n  return impl_ != nullptr;\n}\n\ntemplate <typename TCtx, typename TChan>\nconst std::unordered_map<Device, std::string>& ContextBoilerplate<TCtx, TChan>::\n    deviceDescriptors() const {\n  if (unlikely(!impl_)) {\n    // FIXME In C++-17 perhaps a global static inline variable would be better?\n    static std::unordered_map<Device, std::string> empty = {};\n    return empty;\n  }\n  return impl_->deviceDescriptors();\n}\n\ntemplate <typename TCtx, typename TChan>\nbool ContextBoilerplate<TCtx, TChan>::canCommunicateWithRemote(\n    const std::string& localDeviceDescriptor,\n    const std::string& remoteDeviceDescriptor) const {\n  if (unlikely(!impl_)) {\n    return false;\n  }\n  return impl_->canCommunicateWithRemote(\n      localDeviceDescriptor, remoteDeviceDescriptor);\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextBoilerplate<TCtx, TChan>::setId(std::string id) {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->setId(std::move(id));\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextBoilerplate<TCtx, TChan>::close() {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->close();\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextBoilerplate<TCtx, TChan>::join() {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->join();\n}\n\ntemplate <typename TCtx, typename TChan>\nContextBoilerplate<TCtx, TChan>::~ContextBoilerplate() {\n  join();\n}\n\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/context_impl_boilerplate.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <future>\n#include <memory>\n#include <string>\n#include <unordered_map>\n#include <utility>\n\n#include <tensorpipe/channel/channel_boilerplate.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\n\ntemplate <typename TCtx, typename TChan>\nclass ContextImplBoilerplate : public virtual DeferredExecutor,\n                               public std::enable_shared_from_this<TCtx> {\n public:\n  explicit ContextImplBoilerplate(\n      std::unordered_map<Device, std::string> deviceDescriptors);\n\n  ContextImplBoilerplate(const ContextImplBoilerplate&) = delete;\n  ContextImplBoilerplate(ContextImplBoilerplate&&) = delete;\n  ContextImplBoilerplate& operator=(const ContextImplBoilerplate&) = delete;\n  ContextImplBoilerplate& operator=(ContextImplBoilerplate&&) = delete;\n\n  void init();\n\n  virtual size_t numConnectionsNeeded() const;\n\n  const std::unordered_map<Device, std::string>& deviceDescriptors() const;\n\n  virtual bool canCommunicateWithRemote(\n      const std::string& localDeviceDescriptor,\n      const std::string& remoteDeviceDescriptor) const;\n\n  // Enrolling dependent objects (channels) causes them to be kept alive for as\n  // long as the context exists. These objects should enroll themselves as soon\n  // as they're created (in their initImplFromLoop method) and unenroll\n  // themselves after they've completed handling an error (either right in the\n  // handleErrorImpl method or in a subsequent callback). The context, on the\n  // other hand, should avoid terminating (i.e., complete joining) until all\n  // objects have unenrolled themselves.\n  void enroll(TChan& channel);\n  void unenroll(TChan& channel);\n\n  // Return whether the context is in a closed state. To avoid race conditions,\n  // this must be called from within the loop.\n  bool closed();\n\n  void setId(std::string id);\n\n  void close();\n\n  void join();\n\n  virtual ~ContextImplBoilerplate() = default;\n\n protected:\n  virtual void initImplFromLoop() {}\n  virtual void handleErrorImpl() = 0;\n  virtual void joinImpl() = 0;\n  virtual void setIdImpl() {}\n\n  void setError(Error error);\n\n  template <typename... Args>\n  std::shared_ptr<Channel> createChannelInternal(Args&&... args);\n\n  Error error_{Error::kSuccess};\n\n  // An identifier for the context, composed of the identifier for the context,\n  // combined with the channel's name. It will only be used for logging and\n  // debugging purposes.\n  std::string id_{\"N/A\"};\n\n  CallbackWrapper<TCtx> callbackWrapper_{*this, *this};\n\n private:\n  void initFromLoop();\n  void closeFromLoop();\n\n  void handleError();\n\n  std::atomic<bool> joined_{false};\n\n  const std::unordered_map<Device, std::string> deviceDescriptors_;\n\n  // Sequence numbers for the channels created by this context, used to create\n  // their identifiers based off this context's identifier. They will only be\n  // used for logging and debugging.\n  std::atomic<uint64_t> channelCounter_{0};\n\n  // Store shared_ptrs to dependent objects that have enrolled themselves to\n  // keep them alive. We use a map, indexed by raw pointers, rather than a set\n  // of shared_ptrs so that we can erase objects without them having to create\n  // a fresh shared_ptr just for that.\n  std::unordered_map<TChan*, std::shared_ptr<TChan>> channels_;\n\n  // For some odd reason it seems we need to use a qualified name here...\n  template <typename T>\n  friend class tensorpipe::CallbackWrapper;\n};\n\ntemplate <typename TCtx, typename TChan>\nContextImplBoilerplate<TCtx, TChan>::ContextImplBoilerplate(\n    std::unordered_map<Device, std::string> deviceDescriptors)\n    : deviceDescriptors_(std::move(deviceDescriptors)) {}\n\ntemplate <typename TCtx, typename TChan>\ntemplate <typename... Args>\nstd::shared_ptr<Channel> ContextImplBoilerplate<TCtx, TChan>::\n    createChannelInternal(Args&&... args) {\n  std::string channelId = id_ + \".c\" + std::to_string(channelCounter_++);\n  TP_VLOG(4) << \"Channel context \" << id_ << \" is opening channel \"\n             << channelId;\n  return std::make_shared<ChannelBoilerplate<TCtx, TChan>>(\n      typename ChannelImplBoilerplate<TCtx, TChan>::ConstructorToken(),\n      this->shared_from_this(),\n      std::move(channelId),\n      std::forward<Args>(args)...);\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextImplBoilerplate<TCtx, TChan>::init() {\n  deferToLoop([this]() { initFromLoop(); });\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextImplBoilerplate<TCtx, TChan>::initFromLoop() {\n  TP_DCHECK(inLoop());\n\n  TP_DCHECK(!error_);\n\n  initImplFromLoop();\n}\n\ntemplate <typename TCtx, typename TChan>\nsize_t ContextImplBoilerplate<TCtx, TChan>::numConnectionsNeeded() const {\n  return 1;\n}\n\ntemplate <typename TCtx, typename TChan>\nconst std::unordered_map<Device, std::string>& ContextImplBoilerplate<\n    TCtx,\n    TChan>::deviceDescriptors() const {\n  return deviceDescriptors_;\n}\n\ntemplate <typename TCtx, typename TChan>\nbool ContextImplBoilerplate<TCtx, TChan>::canCommunicateWithRemote(\n    const std::string& localDeviceDescriptor,\n    const std::string& remoteDeviceDescriptor) const {\n  return localDeviceDescriptor == remoteDeviceDescriptor;\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextImplBoilerplate<TCtx, TChan>::enroll(TChan& channel) {\n  TP_DCHECK(inLoop());\n  bool wasInserted;\n  std::tie(std::ignore, wasInserted) =\n      channels_.emplace(&channel, channel.shared_from_this());\n  TP_DCHECK(wasInserted);\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextImplBoilerplate<TCtx, TChan>::unenroll(TChan& channel) {\n  TP_DCHECK(inLoop());\n  auto numRemoved = channels_.erase(&channel);\n  TP_DCHECK_EQ(numRemoved, 1);\n}\n\ntemplate <typename TCtx, typename TChan>\nbool ContextImplBoilerplate<TCtx, TChan>::closed() {\n  TP_DCHECK(inLoop());\n  return error_;\n};\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextImplBoilerplate<TCtx, TChan>::setId(std::string id) {\n  TP_VLOG(4) << \"Channel context \" << id_ << \" was renamed to \" << id;\n  id_ = std::move(id);\n  setIdImpl();\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextImplBoilerplate<TCtx, TChan>::close() {\n  deferToLoop([this]() { closeFromLoop(); });\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextImplBoilerplate<TCtx, TChan>::closeFromLoop() {\n  TP_DCHECK(inLoop());\n  TP_VLOG(4) << \"Channel context \" << id_ << \" is closing\";\n  setError(TP_CREATE_ERROR(ContextClosedError));\n  TP_VLOG(4) << \"Channel context \" << id_ << \" done closing\";\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextImplBoilerplate<TCtx, TChan>::setError(Error error) {\n  // Don't overwrite an error that's already set.\n  if (error_ || !error) {\n    return;\n  }\n\n  error_ = std::move(error);\n\n  handleError();\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextImplBoilerplate<TCtx, TChan>::handleError() {\n  TP_DCHECK(inLoop());\n  TP_VLOG(5) << \"Channel context \" << id_ << \" is handling error \"\n             << error_.what();\n\n  // Make a copy as they could unenroll themselves inline.\n  auto channelsCopy = channels_;\n  // We call closeFromLoop, rather than just close, because we need these\n  // objects to transition _immediately_ to error, \"atomically\". If we just\n  // deferred closing to later, this could come after some already-enqueued\n  // operations that could try to access the context, which would be closed,\n  // and this could fail.\n  for (auto& iter : channelsCopy) {\n    iter.second->closeFromLoop();\n  }\n\n  handleErrorImpl();\n}\n\ntemplate <typename TCtx, typename TChan>\nvoid ContextImplBoilerplate<TCtx, TChan>::join() {\n  close();\n\n  if (!joined_.exchange(true)) {\n    TP_VLOG(4) << \"Channel context \" << id_ << \" is joining\";\n\n    // As closing is deferred to the loop, we must wait for closeImpl to be\n    // actually called before we call joinImpl, to avoid race conditions. For\n    // this, we defer another task to the loop, which we know will run after the\n    // closing, and then we wait for that task to be run.\n    std::promise<void> hasClosed;\n    deferToLoop([&]() { hasClosed.set_value(); });\n    hasClosed.get_future().wait();\n\n    joinImpl();\n\n    TP_VLOG(4) << \"Channel context \" << id_ << \" done joining\";\n\n    // FIXME This may actually not be true, as channels could for example be\n    // kept alive by the underlying transport, and thus outlive their context.\n    // TP_DCHECK(channels_.empty());\n  }\n}\n\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/channel_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_basic/channel_impl.h>\n\n#include <memory>\n#include <string>\n#include <utility>\n\n#include <cuda_runtime.h>\n\n#include <tensorpipe/channel/channel.h>\n#include <tensorpipe/channel/cuda_basic/constants.h>\n#include <tensorpipe/channel/cuda_basic/context_impl.h>\n#include <tensorpipe/common/cpu_buffer.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_buffer.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/transport/connection.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_basic {\n\nnamespace {\n\nsize_t ceilOfRatio(size_t n, size_t d) {\n  return (n + d - 1) / d;\n}\n\n} // namespace\n\nChannelImpl::ChannelImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::shared_ptr<transport::Connection> connection,\n    std::shared_ptr<Channel> cpuChannel,\n    CudaLoop& cudaLoop)\n    : ChannelImplBoilerplate<ContextImpl, ChannelImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      connection_(std::move(connection)),\n      cpuChannel_(std::move(cpuChannel)),\n      cudaLoop_(cudaLoop) {}\n\nvoid ChannelImpl::initImplFromLoop() {\n  context_->enroll(*this);\n}\n\nvoid ChannelImpl::cudaCopy(\n    void* dst,\n    const void* src,\n    size_t length,\n    int deviceIdx,\n    cudaStream_t stream,\n    std::function<void(const Error&)> callback) {\n  {\n    CudaDeviceGuard guard(deviceIdx);\n    TP_CUDA_CHECK(cudaMemcpyAsync(dst, src, length, cudaMemcpyDefault, stream));\n  }\n\n  cudaLoop_.addCallback(deviceIdx, stream, std::move(callback));\n}\n\nvoid ChannelImpl::sendImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TSendCallback callback) {\n  if (length == 0) {\n    callback(error_);\n    return;\n  }\n\n  const Device device = buffer.device();\n  const size_t chunkLength = kSlotSize;\n  const size_t numChunks = ceilOfRatio(length, chunkLength);\n  for (size_t offset = 0; offset < length; offset += chunkLength) {\n    ChunkSendOpIter opIter = chunkSendOps_.emplaceBack(nextChunkBeingSent_++);\n    ChunkSendOperation& op = *opIter;\n    op.bufferSequenceNumber = sequenceNumber;\n    op.chunkId = offset / chunkLength;\n    op.numChunks = numChunks;\n    op.length = std::min(length - offset, chunkLength);\n    // Operations are processed in order, so we can afford to trigger the\n    // callback once the last operation is done.\n    if (op.chunkId == numChunks - 1) {\n      op.callback = std::move(callback);\n    }\n\n    if (device.type == kCpuDeviceType) {\n      op.isCpuBuffer = true;\n      op.devicePtr =\n          static_cast<uint8_t*>(buffer.unwrap<CpuBuffer>().ptr) + offset;\n    } else if (device.type == kCudaDeviceType) {\n      op.isCpuBuffer = false;\n      op.devicePtr =\n          static_cast<uint8_t*>(buffer.unwrap<CudaBuffer>().ptr) + offset;\n      op.stream = buffer.unwrap<CudaBuffer>().stream;\n      op.deviceIdx = device.index;\n    } else {\n      TP_THROW_ASSERT() << \"Unexpected device type: \" << device.type;\n    }\n\n    chunkSendOps_.advanceOperation(opIter);\n  }\n}\n\nvoid ChannelImpl::advanceChunkSendOperation(\n    ChunkSendOpIter opIter,\n    ChunkSendOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  ChunkSendOperation& op = *opIter;\n\n  // Needs to go after previous op invoked its callback because the last chunk\n  // in a series (that corresponds to one operation) must invoke its callback\n  // only when all chunks in the series are done.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::UNINITIALIZED,\n      /*to=*/ChunkSendOperation::FINISHED,\n      /*cond=*/error_ && prevOpState >= ChunkSendOperation::INVOKED_CALLBACK,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of send calls on CPU channel.\n  // This transition shortcuts the allocation of/copy to staging memory when the\n  // buffer is already on CPU.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::UNINITIALIZED,\n      /*to=*/ChunkSendOperation::SENDING_CPU_BUFFER,\n      /*cond=*/!error_ && op.isCpuBuffer &&\n          prevOpState >= ChunkSendOperation::SENDING_CPU_BUFFER,\n      /*actions=*/\n      {&ChannelImpl::writeReadyToSend, &ChannelImpl::sendCpuBuffer});\n\n  // Needs to go after previous op to ensure later operations are not holding\n  // staging buffers while earlier ones are still blocked waiting for them,\n  // because the staging buffer will only be returned to the allocator once the\n  // operation is destroyed, but this won't happen until earlier operations have\n  // completed, and if they are blocked waiting for buffers we may deadlock.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::UNINITIALIZED,\n      /*to=*/ChunkSendOperation::ALLOCATING_CPU_BUFFER,\n      /*cond=*/!error_ && !op.isCpuBuffer &&\n          prevOpState >= ChunkSendOperation::ALLOCATING_CPU_BUFFER,\n      /*actions=*/{&ChannelImpl::allocateSendCpuBuffer});\n\n  // See above for why this needs to go after previous op.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::ALLOCATING_CPU_BUFFER,\n      /*to=*/ChunkSendOperation::FINISHED,\n      /*cond=*/error_ && op.doneAllocatingCpuStagingBuffer &&\n          prevOpState >= ChunkSendOperation::INVOKED_CALLBACK,\n      /*actions=*/\n      {&ChannelImpl::callSendCallback, &ChannelImpl::returnSendCpuBuffer});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the control connection.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::ALLOCATING_CPU_BUFFER,\n      /*to=*/ChunkSendOperation::COPYING_FROM_GPU_TO_CPU,\n      /*cond=*/!error_ && op.doneAllocatingCpuStagingBuffer &&\n          prevOpState >= ChunkSendOperation::COPYING_FROM_GPU_TO_CPU,\n      /*actions=*/\n      {&ChannelImpl::writeReadyToSend, &ChannelImpl::copyFromGpuToCpu});\n\n  // See above for why this needs to go after previous op.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::COPYING_FROM_GPU_TO_CPU,\n      /*to=*/ChunkSendOperation::FINISHED,\n      /*cond=*/error_ && op.doneCopyingFromGpuToCpu &&\n          prevOpState >= ChunkSendOperation::INVOKED_CALLBACK,\n      /*actions=*/\n      {&ChannelImpl::callSendCallback, &ChannelImpl::returnSendCpuBuffer});\n\n  // See above for why this needs to go after previous op.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::COPYING_FROM_GPU_TO_CPU,\n      /*to=*/ChunkSendOperation::INVOKED_CALLBACK,\n      /*cond=*/!error_ && op.doneCopyingFromGpuToCpu &&\n          prevOpState >= ChunkSendOperation::INVOKED_CALLBACK,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::INVOKED_CALLBACK,\n      /*to=*/ChunkSendOperation::FINISHED,\n      /*cond=*/error_,\n      /*actions=*/{&ChannelImpl::returnSendCpuBuffer});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of send calls on CPU channel.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::INVOKED_CALLBACK,\n      /*to=*/ChunkSendOperation::SENDING_CPU_BUFFER,\n      /*cond=*/!error_ && prevOpState >= ChunkSendOperation::SENDING_CPU_BUFFER,\n      /*actions=*/{&ChannelImpl::sendCpuBuffer});\n\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::SENDING_CPU_BUFFER,\n      /*to=*/ChunkSendOperation::FINISHED,\n      /*cond=*/op.doneSendingCpuBuffer && op.isCpuBuffer,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::SENDING_CPU_BUFFER,\n      /*to=*/ChunkSendOperation::FINISHED,\n      /*cond=*/op.doneSendingCpuBuffer && !op.isCpuBuffer,\n      /*actions=*/{&ChannelImpl::returnSendCpuBuffer});\n}\n\nvoid ChannelImpl::allocateSendCpuBuffer(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  TP_VLOG(5) << \"Channel \" << id_\n             << \" is allocating temporary memory for chunk #\" << op.chunkId\n             << \" of \" << op.numChunks << \" for buffer #\"\n             << op.bufferSequenceNumber;\n  Allocator& cudaHostAllocator =\n      context_->getCudaHostSendAllocator(op.deviceIdx);\n  cudaHostAllocator.alloc(\n      op.length,\n      callbackWrapper_(\n          [opIter](ChannelImpl& impl, std::shared_ptr<uint8_t> tmpBuffer) {\n            TP_VLOG(5) << \"Channel \" << impl.id_\n                       << \" is done allocating temporary memory for chunk #\"\n                       << opIter->chunkId << \" of \" << opIter->numChunks\n                       << \" for buffer #\" << opIter->bufferSequenceNumber;\n            opIter->doneAllocatingCpuStagingBuffer = true;\n            if (!impl.error_) {\n              opIter->tmpBuffer = std::move(tmpBuffer);\n            }\n            impl.chunkSendOps_.advanceOperation(opIter);\n          }));\n}\n\nvoid ChannelImpl::writeReadyToSend(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_\n             << \" is sending ready-to-send notification for chunk #\"\n             << op.chunkId << \" of \" << op.numChunks << \" for buffer #\"\n             << op.bufferSequenceNumber;\n  connection_->write(\n      nullptr,\n      0,\n      callbackWrapper_([bufferSequenceNumber{op.bufferSequenceNumber},\n                        chunkId{op.chunkId},\n                        numChunks{op.numChunks}](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" is done sending ready-to-send notification for chunk #\"\n                   << chunkId << \" of \" << numChunks << \" for buffer #\"\n                   << bufferSequenceNumber;\n      }));\n}\n\nvoid ChannelImpl::copyFromGpuToCpu(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  TP_VLOG(5) << \"Channel \" << id_ << \" is copying chunk #\" << op.chunkId\n             << \" of \" << op.numChunks << \" for buffer #\"\n             << op.bufferSequenceNumber << \" from CUDA device to CPU\";\n  cudaCopy(\n      op.tmpBuffer.get(),\n      op.devicePtr,\n      op.length,\n      op.deviceIdx,\n      op.stream,\n      callbackWrapper_([opIter](ChannelImpl& impl) {\n        TP_VLOG(5) << \"Channel \" << impl.id_ << \" is done copying chunk #\"\n                   << opIter->chunkId << \" of \" << opIter->numChunks\n                   << \" for buffer #\" << opIter->bufferSequenceNumber\n                   << \" from CUDA device to CPU\";\n        opIter->doneCopyingFromGpuToCpu = true;\n        impl.chunkSendOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::sendCpuBuffer(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is sending chunk #\" << op.chunkId\n             << \" of \" << op.numChunks << \" for buffer #\"\n             << op.bufferSequenceNumber << \" through CPU channel\";\n\n  cpuChannel_->send(\n      CpuBuffer{.ptr = op.isCpuBuffer ? op.devicePtr : op.tmpBuffer.get()},\n      op.length,\n      callbackWrapper_([opIter](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" is done sending chunk #\"\n                   << opIter->chunkId << \" of \" << opIter->numChunks\n                   << \" for buffer #\" << opIter->bufferSequenceNumber\n                   << \" through CPU channel\";\n        opIter->doneSendingCpuBuffer = true;\n        impl.chunkSendOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::callSendCallback(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  if (op.callback) {\n    op.callback(error_);\n    // Reset callback to release the resources it was holding.\n    op.callback = nullptr;\n  }\n}\n\nvoid ChannelImpl::returnSendCpuBuffer(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  // The pointer's deleter will return the buffer to the allocator.\n  op.tmpBuffer = nullptr;\n}\n\nvoid ChannelImpl::recvImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TRecvCallback callback) {\n  if (length == 0) {\n    callback(error_);\n    return;\n  }\n\n  const Device device = buffer.device();\n  const size_t chunkLength = kSlotSize;\n  const size_t numChunks = ceilOfRatio(length, chunkLength);\n  for (size_t offset = 0; offset < length; offset += chunkLength) {\n    ChunkRecvOpIter opIter =\n        chunkRecvOps_.emplaceBack(nextChunkBeingReceived_++);\n    ChunkRecvOperation& op = *opIter;\n    op.bufferSequenceNumber = sequenceNumber;\n    op.chunkId = offset / chunkLength;\n    op.numChunks = numChunks;\n    op.length = std::min(length - offset, chunkLength);\n    // Operations are processed in order, so we can afford to trigger the\n    // callback once the last operation is done.\n    if (op.chunkId == numChunks - 1) {\n      op.callback = std::move(callback);\n    }\n\n    if (device.type == kCpuDeviceType) {\n      op.isCpuBuffer = true;\n      op.devicePtr =\n          static_cast<uint8_t*>(buffer.unwrap<CpuBuffer>().ptr) + offset;\n    } else if (device.type == kCudaDeviceType) {\n      op.isCpuBuffer = false;\n      op.devicePtr =\n          static_cast<uint8_t*>(buffer.unwrap<CudaBuffer>().ptr) + offset;\n      op.stream = buffer.unwrap<CudaBuffer>().stream;\n      op.deviceIdx = device.index;\n    } else {\n      TP_THROW_ASSERT() << \"Unexpected device type: \" << device.type;\n    }\n\n    chunkRecvOps_.advanceOperation(opIter);\n  }\n}\n\nvoid ChannelImpl::advanceChunkRecvOperation(\n    ChunkRecvOpIter opIter,\n    ChunkRecvOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  ChunkRecvOperation& op = *opIter;\n\n  // Needs to go after previous op invoked its callback because the last chunk\n  // in a series (that corresponds to one operation) must invoke its callback\n  // only when all chunks in the series are done.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::UNINITIALIZED,\n      /*to=*/ChunkRecvOperation::FINISHED,\n      /*cond=*/error_ &&\n          prevOpState >=\n              ChunkRecvOperation::COPYING_FROM_CPU_TO_GPU_AND_INVOKED_CALLBACK,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of read calls on control connection.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::UNINITIALIZED,\n      /*to=*/ChunkRecvOperation::READING_READY_TO_SEND,\n      /*cond=*/!error_ &&\n          prevOpState >= ChunkRecvOperation::READING_READY_TO_SEND,\n      /*actions=*/{&ChannelImpl::readReadyToSend});\n\n  // See above for why this needs to go after previous op.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::READING_READY_TO_SEND,\n      /*to=*/ChunkRecvOperation::FINISHED,\n      /*cond=*/error_ && op.doneReadingReadyToSend &&\n          prevOpState >=\n              ChunkRecvOperation::COPYING_FROM_CPU_TO_GPU_AND_INVOKED_CALLBACK,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of recv calls on CPU channel.\n  // This operation shortcuts allocating staging memory when receiving directly\n  // on CPU.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::READING_READY_TO_SEND,\n      /*to=*/ChunkRecvOperation::RECEIVING_CPU_BUFFER,\n      /*cond=*/!error_ && op.doneReadingReadyToSend && op.isCpuBuffer &&\n          prevOpState >= ChunkRecvOperation::RECEIVING_CPU_BUFFER,\n      /*actions=*/{&ChannelImpl::receiveCpuBuffer});\n\n  // Needs to go after previous op to ensure later operations are not holding\n  // staging buffers while earlier ones are still blocked waiting for them,\n  // because the staging buffer will only be returned to the allocator once the\n  // operation is destroyed, but this won't happen until earlier operations have\n  // completed, and if they are blocked waiting for buffers we may deadlock.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::READING_READY_TO_SEND,\n      /*to=*/ChunkRecvOperation::ALLOCATING_CPU_BUFFER,\n      /*cond=*/!error_ && op.doneReadingReadyToSend && !op.isCpuBuffer &&\n          prevOpState >= ChunkRecvOperation::ALLOCATING_CPU_BUFFER,\n      /*actions=*/{&ChannelImpl::allocateRecvCpuBuffer});\n\n  // See above for why this needs to go after previous op.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::ALLOCATING_CPU_BUFFER,\n      /*to=*/ChunkRecvOperation::FINISHED,\n      /*cond=*/error_ && op.doneAllocatingCpuStagingBuffer &&\n          prevOpState >=\n              ChunkRecvOperation::COPYING_FROM_CPU_TO_GPU_AND_INVOKED_CALLBACK,\n      /*actions=*/\n      {&ChannelImpl::callRecvCallback, &ChannelImpl::returnRecvCpuBuffer});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of recv calls on CPU channel.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::ALLOCATING_CPU_BUFFER,\n      /*to=*/ChunkRecvOperation::RECEIVING_CPU_BUFFER,\n      /*cond=*/!error_ && op.doneAllocatingCpuStagingBuffer &&\n          prevOpState >= ChunkRecvOperation::RECEIVING_CPU_BUFFER,\n      /*actions=*/{&ChannelImpl::receiveCpuBuffer});\n\n  // See above for why this needs to go after previous op.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::RECEIVING_CPU_BUFFER,\n      /*to=*/ChunkRecvOperation::FINISHED,\n      /*cond=*/error_ && op.doneReceivingCpuBuffer && !op.isCpuBuffer &&\n          prevOpState >=\n              ChunkRecvOperation::COPYING_FROM_CPU_TO_GPU_AND_INVOKED_CALLBACK,\n      /*actions=*/\n      {&ChannelImpl::callRecvCallback, &ChannelImpl::returnRecvCpuBuffer});\n\n  // This transition shortcuts the copy to GPU when receiving on CPU memory.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::RECEIVING_CPU_BUFFER,\n      /*to=*/ChunkRecvOperation::FINISHED,\n      /*cond=*/op.doneReceivingCpuBuffer && op.isCpuBuffer,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::RECEIVING_CPU_BUFFER,\n      /*to=*/ChunkRecvOperation::COPYING_FROM_CPU_TO_GPU,\n      /*cond=*/!error_ && op.doneReceivingCpuBuffer && !op.isCpuBuffer,\n      /*actions=*/{&ChannelImpl::copyFromCpuToGpu});\n\n  // See above for why this needs to go after previous op.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::COPYING_FROM_CPU_TO_GPU,\n      /*to=*/ChunkRecvOperation::COPYING_FROM_CPU_TO_GPU_AND_INVOKED_CALLBACK,\n      /*cond=*/prevOpState >=\n          ChunkRecvOperation::COPYING_FROM_CPU_TO_GPU_AND_INVOKED_CALLBACK,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::COPYING_FROM_CPU_TO_GPU_AND_INVOKED_CALLBACK,\n      /*to=*/ChunkRecvOperation::FINISHED,\n      /*cond=*/op.doneCopyingFromCpuToGpu,\n      /*actions=*/{&ChannelImpl::returnRecvCpuBuffer});\n}\n\nvoid ChannelImpl::readReadyToSend(ChunkRecvOpIter opIter) {\n  ChunkRecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_\n             << \" is reading ready-to-send notification for chunk #\"\n             << op.chunkId << \" of \" << op.numChunks << \" for buffer #\"\n             << op.bufferSequenceNumber;\n  connection_->read(callbackWrapper_(\n      [opIter](\n          ChannelImpl& impl, const void* /* unused */, size_t /* unused */) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" is done reading ready-to-send notification for chunk #\"\n                   << opIter->chunkId << \" of \" << opIter->numChunks\n                   << \" for buffer #\" << opIter->bufferSequenceNumber;\n        opIter->doneReadingReadyToSend = true;\n        impl.chunkRecvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::allocateRecvCpuBuffer(ChunkRecvOpIter opIter) {\n  ChunkRecvOperation& op = *opIter;\n\n  TP_VLOG(5) << \"Channel \" << id_\n             << \" is allocating temporary memory for chunk #\" << op.chunkId\n             << \" of \" << op.numChunks << \" for buffer #\"\n             << op.bufferSequenceNumber;\n  Allocator& cudaHostAllocator =\n      context_->getCudaHostRecvAllocator(op.deviceIdx);\n  cudaHostAllocator.alloc(\n      op.length,\n      callbackWrapper_(\n          [opIter](\n              ChannelImpl& impl, std::shared_ptr<uint8_t> tmpBuffer) mutable {\n            TP_VLOG(5) << \"Channel \" << impl.id_\n                       << \" is done allocating temporary memory for chunk #\"\n                       << opIter->chunkId << \" of \" << opIter->numChunks\n                       << \" for buffer #\" << opIter->bufferSequenceNumber;\n            opIter->doneAllocatingCpuStagingBuffer = true;\n            if (!impl.error_) {\n              opIter->tmpBuffer = std::move(tmpBuffer);\n            }\n            impl.chunkRecvOps_.advanceOperation(opIter);\n          }));\n}\n\nvoid ChannelImpl::receiveCpuBuffer(ChunkRecvOpIter opIter) {\n  ChunkRecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is sending chunk #\" << op.chunkId\n             << \" of \" << op.numChunks << \" for buffer #\"\n             << op.bufferSequenceNumber << \" through CPU channel\";\n  cpuChannel_->recv(\n      CpuBuffer{.ptr = op.isCpuBuffer ? op.devicePtr : op.tmpBuffer.get()},\n      op.length,\n      callbackWrapper_([opIter](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" is done sending chunk #\"\n                   << opIter->chunkId << \" of \" << opIter->numChunks\n                   << \" for buffer #\" << opIter->bufferSequenceNumber\n                   << \" through CPU channel\";\n        opIter->doneReceivingCpuBuffer = true;\n        impl.chunkRecvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::copyFromCpuToGpu(ChunkRecvOpIter opIter) {\n  ChunkRecvOperation& op = *opIter;\n\n  TP_VLOG(5) << \"Channel \" << id_ << \" is copying chunk #\" << op.chunkId\n             << \" of \" << op.numChunks << \" for buffer #\"\n             << op.bufferSequenceNumber << \" from CPU to CUDA device\";\n  cudaCopy(\n      op.devicePtr,\n      op.tmpBuffer.get(),\n      op.length,\n      op.deviceIdx,\n      op.stream,\n      callbackWrapper_([opIter](ChannelImpl& impl) {\n        TP_VLOG(5) << \"Channel \" << impl.id_ << \" is done copying chunk #\"\n                   << opIter->chunkId << \" of \" << opIter->numChunks\n                   << \" for buffer #\" << opIter->bufferSequenceNumber\n                   << \" from CPU to CUDA device\";\n        opIter->doneCopyingFromCpuToGpu = true;\n        impl.chunkRecvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::callRecvCallback(ChunkRecvOpIter opIter) {\n  ChunkRecvOperation& op = *opIter;\n\n  if (op.callback) {\n    op.callback(error_);\n    // Reset callback to release the resources it was holding.\n    op.callback = nullptr;\n  }\n}\n\nvoid ChannelImpl::returnRecvCpuBuffer(ChunkRecvOpIter opIter) {\n  ChunkRecvOperation& op = *opIter;\n\n  // The pointer's deleter will return the buffer to the allocator.\n  op.tmpBuffer = nullptr;\n}\n\nvoid ChannelImpl::setIdImpl() {\n  cpuChannel_->setId(id_ + \".cpu\");\n}\n\nvoid ChannelImpl::handleErrorImpl() {\n  chunkSendOps_.advanceAllOperations();\n  chunkRecvOps_.advanceAllOperations();\n\n  connection_->close();\n  cpuChannel_->close();\n\n  context_->unenroll(*this);\n}\n\n} // namespace cuda_basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/channel_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <deque>\n#include <memory>\n#include <string>\n\n#include <tensorpipe/channel/channel_impl_boilerplate.h>\n#include <tensorpipe/common/allocator.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_buffer.h>\n#include <tensorpipe/common/cuda_loop.h>\n#include <tensorpipe/common/state_machine.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_basic {\n\nclass ContextImpl;\n\nstruct ChunkSendOperation {\n  enum State {\n    UNINITIALIZED,\n    ALLOCATING_CPU_BUFFER,\n    COPYING_FROM_GPU_TO_CPU,\n    INVOKED_CALLBACK,\n    SENDING_CPU_BUFFER,\n    FINISHED\n  };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Arguments at creation\n  uint64_t bufferSequenceNumber{0};\n  bool isCpuBuffer{false};\n  void* devicePtr{nullptr};\n  size_t chunkId{0};\n  size_t numChunks{0};\n  size_t length{0};\n  std::function<void(const Error&)> callback;\n\n  // For CUDA buffers\n  cudaStream_t stream{cudaStreamDefault};\n  int deviceIdx{0};\n\n  // Data collected during processing\n  std::shared_ptr<uint8_t> tmpBuffer;\n\n  // Progress flags\n  bool doneAllocatingCpuStagingBuffer{false};\n  bool doneCopyingFromGpuToCpu{false};\n  bool doneSendingCpuBuffer{false};\n};\n\nstruct ChunkRecvOperation {\n  enum State {\n    UNINITIALIZED,\n    READING_READY_TO_SEND,\n    ALLOCATING_CPU_BUFFER,\n    RECEIVING_CPU_BUFFER,\n    COPYING_FROM_CPU_TO_GPU,\n    COPYING_FROM_CPU_TO_GPU_AND_INVOKED_CALLBACK,\n    FINISHED\n  };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Arguments at creation\n  uint64_t bufferSequenceNumber{0};\n  bool isCpuBuffer{false};\n  void* devicePtr{nullptr};\n  size_t chunkId{0};\n  size_t numChunks{0};\n  size_t length{0};\n  std::function<void(const Error&)> callback;\n\n  // For CUDA buffers\n  cudaStream_t stream{cudaStreamDefault};\n  int deviceIdx{0};\n\n  // Data collected during processing\n  std::shared_ptr<uint8_t> tmpBuffer;\n\n  // Progress flags\n  bool doneReadingReadyToSend{false};\n  bool doneAllocatingCpuStagingBuffer{false};\n  bool doneReceivingCpuBuffer{false};\n  bool doneCopyingFromCpuToGpu{false};\n};\n\nclass ChannelImpl final\n    : public ChannelImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  ChannelImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::shared_ptr<transport::Connection> connection,\n      std::shared_ptr<Channel> cpuChannel,\n      CudaLoop& cudaLoop);\n\n protected:\n  // Implement the entry points called by ChannelImplBoilerplate.\n  void initImplFromLoop() override;\n  void sendImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TSendCallback callback) override;\n  void recvImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TRecvCallback callback) override;\n  void handleErrorImpl() override;\n  void setIdImpl() override;\n\n private:\n  const std::shared_ptr<transport::Connection> connection_;\n  const std::shared_ptr<Channel> cpuChannel_;\n  CudaLoop& cudaLoop_;\n\n  // A sequence number for the chunks.\n  uint64_t nextChunkBeingSent_{0};\n  uint64_t nextChunkBeingReceived_{0};\n\n  OpsStateMachine<ChannelImpl, ChunkSendOperation> chunkSendOps_{\n      *this,\n      &ChannelImpl::advanceChunkSendOperation};\n  using ChunkSendOpIter = decltype(chunkSendOps_)::Iter;\n  OpsStateMachine<ChannelImpl, ChunkRecvOperation> chunkRecvOps_{\n      *this,\n      &ChannelImpl::advanceChunkRecvOperation};\n  using ChunkRecvOpIter = decltype(chunkRecvOps_)::Iter;\n\n  // State machines for send and recv ops.\n  void advanceChunkSendOperation(\n      ChunkSendOpIter opIter,\n      ChunkSendOperation::State prevOpState);\n  void advanceChunkRecvOperation(\n      ChunkRecvOpIter opIter,\n      ChunkRecvOperation::State prevOpState);\n\n  // Actions (i.e., methods that begin a state transition).\n  // For send operations:\n  void allocateSendCpuBuffer(ChunkSendOpIter opIter);\n  void copyFromGpuToCpu(ChunkSendOpIter opIter);\n  void callSendCallback(ChunkSendOpIter opIter);\n  void sendCpuBuffer(ChunkSendOpIter opIter);\n  void writeReadyToSend(ChunkSendOpIter opIter);\n  void returnSendCpuBuffer(ChunkSendOpIter opIter);\n  // For recv operations:\n  void readReadyToSend(ChunkRecvOpIter opIter);\n  void allocateRecvCpuBuffer(ChunkRecvOpIter opIter);\n  void receiveCpuBuffer(ChunkRecvOpIter opIter);\n  void copyFromCpuToGpu(ChunkRecvOpIter opIter);\n  void callRecvCallback(ChunkRecvOpIter opIter);\n  void returnRecvCpuBuffer(ChunkRecvOpIter opIter);\n\n  void cudaCopy(\n      void* dst,\n      const void* src,\n      size_t length,\n      int deviceIdx,\n      cudaStream_t stream,\n      std::function<void(const Error&)> callback);\n};\n\n} // namespace cuda_basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/constants.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstddef>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_basic {\n\n// FIXME Avoid this anonymous namespace and use inline variables in C++-17.\nnamespace {\n\n// Define all three (redundant) values to make them explicit and avoid\n// misunderstandings due to miscalculations.\nstatic constexpr size_t kStagingAreaSize = 16 * 1024 * 1024;\nstatic constexpr size_t kSlotSize = 1024 * 1024;\nstatic constexpr size_t kNumSlots = 16;\n\nstatic_assert(kStagingAreaSize == kSlotSize * kNumSlots, \"\");\n\n} // namespace\n\n} // namespace cuda_basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_basic/context_impl.h>\n\n#include <functional>\n#include <memory>\n#include <utility>\n\n#include <tensorpipe/channel/cuda_basic/channel_impl.h>\n#include <tensorpipe/channel/cuda_basic/constants.h>\n#include <tensorpipe/channel/helpers.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/nop.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_basic {\n\nnamespace {\n\nstruct DeviceDescriptor {\n  std::string deviceType;\n  std::string descriptor;\n  NOP_STRUCTURE(DeviceDescriptor, deviceType, descriptor);\n};\n\nDeviceDescriptor deserializeDeviceDescriptor(\n    const std::string& deviceDescriptor) {\n  NopHolder<DeviceDescriptor> nopHolder;\n  loadDescriptor(nopHolder, deviceDescriptor);\n  return std::move(nopHolder.getObject());\n}\n\n} // namespace\n\nstd::shared_ptr<ContextImpl> ContextImpl::create(\n    std::shared_ptr<Context> cpuContext) {\n  Error error;\n  CudaLib cudaLib;\n  std::tie(error, cudaLib) = CudaLib::create();\n  if (error) {\n    TP_VLOG(5)\n        << \"CUDA basic channel is not viable because libcuda could not be loaded: \"\n        << error.what();\n    return nullptr;\n  }\n\n  if (cpuContext->deviceDescriptors().count(Device{kCpuDeviceType, 0}) == 0) {\n    TP_THROW_ASSERT() << \"CUDA basic channel needs a CPU channel\";\n\n    return nullptr;\n  }\n\n  if (!cpuContext->isViable()) {\n    return nullptr;\n  }\n\n  std::unordered_map<Device, std::string> deviceDescriptors;\n  // NOTE: Assume there is only one CPU.\n  TP_DCHECK_EQ(\n      cpuContext->deviceDescriptors().count(Device{kCpuDeviceType, 0}), 1);\n  const auto cpuDeviceDescriptor =\n      cpuContext->deviceDescriptors().begin()->second;\n\n  NopHolder<DeviceDescriptor> nopHolder;\n  DeviceDescriptor& deviceDescriptor = nopHolder.getObject();\n  deviceDescriptor.descriptor = cpuDeviceDescriptor;\n\n  deviceDescriptor.deviceType = kCpuDeviceType;\n  deviceDescriptors[Device{kCpuDeviceType, 0}] = saveDescriptor(nopHolder);\n  for (const auto& device : getCudaDevices(cudaLib)) {\n    deviceDescriptor.deviceType = kCudaDeviceType;\n    deviceDescriptors[device] = saveDescriptor(nopHolder);\n  }\n\n  return std::make_shared<ContextImpl>(\n      std::move(cudaLib), std::move(cpuContext), std::move(deviceDescriptors));\n}\n\nContextImpl::ContextImpl(\n    CudaLib cudaLib,\n    std::shared_ptr<Context> cpuContext,\n    std::unordered_map<Device, std::string> deviceDescriptors)\n    : ContextImplBoilerplate<ContextImpl, ChannelImpl>(\n          std::move(deviceDescriptors)),\n      cudaLib_(std::move(cudaLib)),\n      cpuContext_(std::move(cpuContext)) {}\n\nstd::shared_ptr<Channel> ContextImpl::createChannel(\n    std::vector<std::shared_ptr<transport::Connection>> connections,\n    Endpoint endpoint) {\n  TP_DCHECK_EQ(numConnectionsNeeded(), connections.size());\n  auto conn = std::move(connections.back());\n  connections.pop_back();\n  auto cpuChannel =\n      cpuContext_->createChannel(std::move(connections), endpoint);\n  return createChannelInternal(\n      std::move(conn), std::move(cpuChannel), cudaLoop_);\n}\n\nsize_t ContextImpl::numConnectionsNeeded() const {\n  return 1 + cpuContext_->numConnectionsNeeded();\n}\n\nbool ContextImpl::canCommunicateWithRemote(\n    const std::string& localDeviceDescriptor,\n    const std::string& remoteDeviceDescriptor) const {\n  DeviceDescriptor nopLocalDeviceDescriptor =\n      deserializeDeviceDescriptor(localDeviceDescriptor);\n  DeviceDescriptor nopRemoteDeviceDescriptor =\n      deserializeDeviceDescriptor(remoteDeviceDescriptor);\n\n  // Prevent CudaBasic from being mistakenly used for CPU to CPU transfers, as\n  // there are always better options.\n  if (nopLocalDeviceDescriptor.deviceType == kCpuDeviceType &&\n      nopRemoteDeviceDescriptor.deviceType == kCpuDeviceType) {\n    return false;\n  }\n\n  return nopLocalDeviceDescriptor.descriptor ==\n      nopRemoteDeviceDescriptor.descriptor;\n}\n\nconst CudaLib& ContextImpl::getCudaLib() {\n  return cudaLib_;\n}\n\nAllocator& ContextImpl::getCudaHostSendAllocator(int deviceIdx) {\n  if (!cudaHostSendAllocator_.has_value()) {\n    CudaPinnedBuffer buffer = makeCudaPinnedBuffer(kStagingAreaSize, deviceIdx);\n    uint8_t* ptr = buffer.get();\n    cudaHostSendAllocator_.emplace(CudaHostAllocator{\n        std::move(buffer), Allocator(ptr, kNumSlots, kSlotSize)});\n  }\n\n  return cudaHostSendAllocator_->allocator;\n}\n\nAllocator& ContextImpl::getCudaHostRecvAllocator(int deviceIdx) {\n  if (!cudaHostRecvAllocator_.has_value()) {\n    CudaPinnedBuffer buffer = makeCudaPinnedBuffer(kStagingAreaSize, deviceIdx);\n    uint8_t* ptr = buffer.get();\n    cudaHostRecvAllocator_.emplace(CudaHostAllocator{\n        std::move(buffer), Allocator(ptr, kNumSlots, kSlotSize)});\n  }\n\n  return cudaHostRecvAllocator_->allocator;\n}\n\nvoid ContextImpl::handleErrorImpl() {\n  if (cpuContext_ != nullptr) {\n    cpuContext_->close();\n  }\n  cudaLoop_.close();\n\n  if (cudaHostSendAllocator_.has_value()) {\n    cudaHostSendAllocator_->allocator.close();\n  }\n  if (cudaHostRecvAllocator_.has_value()) {\n    cudaHostRecvAllocator_->allocator.close();\n  }\n}\n\nvoid ContextImpl::joinImpl() {\n  if (cpuContext_ != nullptr) {\n    cpuContext_->join();\n  }\n  cudaLoop_.join();\n}\n\nbool ContextImpl::inLoop() const {\n  return loop_.inLoop();\n};\n\nvoid ContextImpl::deferToLoop(std::function<void()> fn) {\n  loop_.deferToLoop(std::move(fn));\n};\n\nvoid ContextImpl::setIdImpl() {\n  cpuContext_->setId(id_ + \".cpu\");\n}\n\n} // namespace cuda_basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <tensorpipe/channel/context_impl_boilerplate.h>\n#include <tensorpipe/common/allocator.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_buffer.h>\n#include <tensorpipe/common/cuda_lib.h>\n#include <tensorpipe/common/cuda_loop.h>\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/device.h>\n#include <tensorpipe/common/optional.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_basic {\n\nclass ChannelImpl;\n\nclass ContextImpl final\n    : public ContextImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  static std::shared_ptr<ContextImpl> create(\n      std::shared_ptr<Context> cpuContext);\n\n  ContextImpl(\n      CudaLib cudaLib,\n      std::shared_ptr<Context> cpuContext,\n      std::unordered_map<Device, std::string> deviceDescriptors);\n\n  std::shared_ptr<Channel> createChannel(\n      std::vector<std::shared_ptr<transport::Connection>> connections,\n      Endpoint endpoint);\n\n  size_t numConnectionsNeeded() const override;\n\n  bool canCommunicateWithRemote(\n      const std::string& localDeviceDescriptor,\n      const std::string& remoteDeviceDescriptor) const override;\n\n  const CudaLib& getCudaLib();\n  Allocator& getCudaHostSendAllocator(int deviceIdx);\n  Allocator& getCudaHostRecvAllocator(int deviceIdx);\n\n  // Implement the DeferredExecutor interface.\n  bool inLoop() const override;\n  void deferToLoop(std::function<void()> fn) override;\n\n protected:\n  // Implement the entry points called by ContextImplBoilerplate.\n  void handleErrorImpl() override;\n  void joinImpl() override;\n  void setIdImpl() override;\n\n private:\n  OnDemandDeferredExecutor loop_;\n\n  const CudaLib cudaLib_;\n\n  const std::shared_ptr<Context> cpuContext_;\n  // TODO: Lazy initialization of cuda loop.\n  CudaLoop cudaLoop_;\n\n  struct CudaHostAllocator {\n    CudaPinnedBuffer buffer;\n    Allocator allocator;\n  };\n  optional<CudaHostAllocator> cudaHostSendAllocator_;\n  optional<CudaHostAllocator> cudaHostRecvAllocator_;\n};\n\n} // namespace cuda_basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/factory.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_basic/factory.h>\n\n#include <tensorpipe/channel/context_boilerplate.h>\n#include <tensorpipe/channel/cuda_basic/channel_impl.h>\n#include <tensorpipe/channel/cuda_basic/context_impl.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_basic {\n\nstd::shared_ptr<Context> create(std::shared_ptr<Context> cpuContext) {\n  return std::make_shared<ContextBoilerplate<ContextImpl, ChannelImpl>>(\n      std::move(cpuContext));\n}\n\n} // namespace cuda_basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_basic/factory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <tensorpipe/channel/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_basic {\n\nstd::shared_ptr<Context> create(std::shared_ptr<Context> cpuContext);\n\n} // namespace cuda_basic\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/channel_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_gdr/channel_impl.h>\n\n#include <algorithm>\n#include <cstring>\n#include <memory>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/channel/cuda_gdr/context_impl.h>\n#include <tensorpipe/common/cuda_buffer.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/transport/connection.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_gdr {\n\nnamespace {\n\nsize_t ceilOfRatio(size_t n, size_t d) {\n  return (n + d - 1) / d;\n}\n\n} // namespace\n\nChannelImpl::ChannelImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::shared_ptr<transport::Connection> descriptorConnection,\n    std::shared_ptr<transport::Connection> readyToReceiveConnection)\n    : ChannelImplBoilerplate<ContextImpl, ChannelImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      descriptorConnection_(std::move(descriptorConnection)),\n      readyToReceiveConnection_(std::move(readyToReceiveConnection)) {}\n\nvoid ChannelImpl::initImplFromLoop() {\n  TP_DCHECK(context_->inLoop());\n  TP_DCHECK_EQ(state_, INITIALIZING);\n  TP_DCHECK(!error_);\n\n  context_->enroll(*this);\n\n  localGpuToNic_ = context_->getGpuToNicMapping();\n  numLocalNics_ =\n      *std::max_element(localGpuToNic_.begin(), localGpuToNic_.end()) + 1;\n\n  auto nopHolderOut = std::make_shared<NopHolder<HandshakeNumNics>>();\n  HandshakeNumNics& nopHandshakeNumNics = nopHolderOut->getObject();\n  nopHandshakeNumNics.numNics = numLocalNics_;\n  TP_VLOG(6) << \"Channel \" << id_\n             << \" is writing nop object (handshake num NICs)\";\n  readyToReceiveConnection_->write(\n      *nopHolderOut, callbackWrapper_([nopHolderOut](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done writing nop object (handshake num NICs)\";\n      }));\n\n  auto nopHolderIn = std::make_shared<NopHolder<HandshakeNumNics>>();\n  TP_VLOG(6) << \"Channel \" << id_\n             << \" is reading nop object (handshake num NICs)\";\n  readyToReceiveConnection_->read(\n      *nopHolderIn, callbackWrapper_([nopHolderIn](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done reading nop object (handshake num NICs)\";\n        if (!impl.error_) {\n          impl.onReadHandshakeNumNics(nopHolderIn->getObject());\n        }\n      }));\n\n  state_ = WAITING_FOR_HANDSHAKE_NUM_NICS;\n}\n\nvoid ChannelImpl::onReadHandshakeNumNics(\n    const HandshakeNumNics& nopHandshakeNumNics) {\n  TP_DCHECK(context_->inLoop());\n  TP_DCHECK_EQ(state_, WAITING_FOR_HANDSHAKE_NUM_NICS);\n  TP_DCHECK(!error_);\n\n  numRemoteNics_ = nopHandshakeNumNics.numNics;\n\n  std::vector<std::vector<NopIbvSetupInformation>> allSetupInfo;\n\n  queuePairs_.resize(numLocalNics_);\n  allSetupInfo.resize(numLocalNics_);\n  for (size_t localNicIdx = 0; localNicIdx < numLocalNics_; localNicIdx++) {\n    queuePairs_[localNicIdx].resize(numRemoteNics_);\n    allSetupInfo[localNicIdx].resize(numRemoteNics_);\n    IbvNic& localNic = context_->getIbvNic(localNicIdx);\n    for (size_t remoteNicIdx = 0; remoteNicIdx < numRemoteNics_;\n         remoteNicIdx++) {\n      IbvLib::qp_init_attr initAttr;\n      std::memset(&initAttr, 0, sizeof(initAttr));\n      initAttr.qp_type = IbvLib::QPT_RC;\n      initAttr.send_cq = localNic.getIbvCq().get();\n      initAttr.recv_cq = localNic.getIbvCq().get();\n      initAttr.cap.max_send_wr = kNumSends;\n      initAttr.cap.max_send_sge = 1;\n      initAttr.cap.max_recv_wr = kNumRecvs;\n      initAttr.cap.max_recv_sge = 1;\n      initAttr.sq_sig_all = 1;\n      IbvQueuePair qp = createIbvQueuePair(\n          context_->getIbvLib(), localNic.getIbvPd(), initAttr);\n\n      transitionIbvQueuePairToInit(\n          context_->getIbvLib(), qp, localNic.getIbvAddress());\n\n      IbvSetupInformation setupInfo =\n          makeIbvSetupInformation(localNic.getIbvAddress(), qp);\n\n      // The maximum message size will be filled in later.\n      queuePairs_[localNicIdx][remoteNicIdx] =\n          QueuePair{std::move(qp), /*maximumMessageSize=*/0};\n      allSetupInfo[localNicIdx][remoteNicIdx].fromIbvSetupInformation(\n          setupInfo);\n    }\n  }\n\n  auto nopHolderOut = std::make_shared<NopHolder<HandshakeSetupInfo>>();\n  HandshakeSetupInfo& nopHandshakeSetupInfo = nopHolderOut->getObject();\n  nopHandshakeSetupInfo.setupInfo = std::move(allSetupInfo);\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing nop object (handshake two)\";\n  readyToReceiveConnection_->write(\n      *nopHolderOut, callbackWrapper_([nopHolderOut](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done writing nop object (handshake two)\";\n      }));\n\n  auto nopHolderIn = std::make_shared<NopHolder<HandshakeSetupInfo>>();\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading nop object (handshake two)\";\n  readyToReceiveConnection_->read(\n      *nopHolderIn, callbackWrapper_([nopHolderIn](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done reading nop object (handshake two)\";\n        if (!impl.error_) {\n          impl.onReadHandshakeSetupInfo(nopHolderIn->getObject());\n        }\n      }));\n\n  state_ = WAITING_FOR_HANDSHAKE_SETUP_INFO;\n}\n\nvoid ChannelImpl::onReadHandshakeSetupInfo(\n    const HandshakeSetupInfo& nopHandshakeSetupInfo) {\n  TP_DCHECK(context_->inLoop());\n  TP_DCHECK_EQ(state_, WAITING_FOR_HANDSHAKE_SETUP_INFO);\n  TP_DCHECK(!error_);\n\n  const std::vector<std::vector<NopIbvSetupInformation>>& remoteSetupInfo =\n      nopHandshakeSetupInfo.setupInfo;\n\n  TP_DCHECK_EQ(remoteSetupInfo.size(), numRemoteNics_);\n  for (size_t remoteNicIdx = 0; remoteNicIdx < numRemoteNics_; remoteNicIdx++) {\n    TP_DCHECK_EQ(remoteSetupInfo[remoteNicIdx].size(), numLocalNics_);\n    for (size_t localNicIdx = 0; localNicIdx < numLocalNics_; localNicIdx++) {\n      IbvNic& localNic = context_->getIbvNic(localNicIdx);\n      IbvSetupInformation setupInfo =\n          remoteSetupInfo[remoteNicIdx][localNicIdx].toIbvSetupInformation();\n      const IbvAddress& localAddress = localNic.getIbvAddress();\n\n      transitionIbvQueuePairToReadyToReceive(\n          context_->getIbvLib(),\n          queuePairs_[localNicIdx][remoteNicIdx].queuePair,\n          localAddress,\n          setupInfo);\n      transitionIbvQueuePairToReadyToSend(\n          context_->getIbvLib(),\n          queuePairs_[localNicIdx][remoteNicIdx].queuePair);\n\n      queuePairs_[localNicIdx][remoteNicIdx].maximumMessageSize = std::min(\n          localAddress.maximumMessageSize, setupInfo.maximumMessageSize);\n    }\n  }\n\n  state_ = ESTABLISHED;\n  sendOps_.advanceAllOperations();\n  recvOps_.advanceAllOperations();\n}\n\nvoid ChannelImpl::sendImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TSendCallback callback) {\n  size_t localGpuIdx = cudaDeviceForPointer(\n      context_->getCudaLib(), buffer.unwrap<CudaBuffer>().ptr);\n  size_t localNicIdx = context_->getGpuToNicMapping()[localGpuIdx];\n\n  SendOpIter opIter = sendOps_.emplaceBack(\n      sequenceNumber,\n      buffer.unwrap<CudaBuffer>(),\n      length,\n      std::move(callback),\n      localGpuIdx,\n      localNicIdx);\n  opIter->event.record(buffer.unwrap<CudaBuffer>().stream);\n\n  sendOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceSendOperation(\n    SendOpIter opIter,\n    SendOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  SendOperation& op = *opIter;\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the descriptor control connection and read calls on the\n  // completion control connection.\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::READING_READY_TO_RECEIVE,\n      /*cond=*/!error_ && state_ == ESTABLISHED &&\n          prevOpState >= SendOperation::READING_READY_TO_RECEIVE,\n      /*actions=*/\n      {&ChannelImpl::writeDescriptor, &ChannelImpl::readReadyToReceive});\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::READING_READY_TO_RECEIVE,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/error_ && op.doneReadingReadyToReceive,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n\n  // This doesn't strictly need to go after the previous op, but it doesn't make\n  // sense to busy poll multiple events if only one of them is actually able to\n  // then make progress.\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::READING_READY_TO_RECEIVE,\n      /*to=*/SendOperation::WAITING_FOR_CUDA_EVENT,\n      /*cond=*/!error_ && op.doneReadingReadyToReceive &&\n          prevOpState >= SendOperation::SENDING_OVER_IB,\n      /*actions=*/{&ChannelImpl::waitForSendCudaEvent});\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::WAITING_FOR_CUDA_EVENT,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/error_ && op.doneWaitingForCudaEvent,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of send calls on InfiniBand queue pair.\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::WAITING_FOR_CUDA_EVENT,\n      /*to=*/SendOperation::SENDING_OVER_IB,\n      /*cond=*/!error_ && op.doneWaitingForCudaEvent &&\n          prevOpState >= SendOperation::SENDING_OVER_IB,\n      /*actions=*/{&ChannelImpl::sendOverIb});\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::SENDING_OVER_IB,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/op.numChunksBeingSent == 0,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n}\n\nvoid ChannelImpl::writeDescriptor(SendOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n  SendOperation& op = *opIter;\n\n  auto nopHolder = std::make_shared<NopHolder<Descriptor>>();\n  Descriptor& nopDescriptor = nopHolder->getObject();\n  nopDescriptor.originNicIdx = op.localNicIdx;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing descriptor (#\"\n             << op.sequenceNumber << \")\";\n  descriptorConnection_->write(\n      *nopHolder,\n      callbackWrapper_([sequenceNumber{op.sequenceNumber},\n                        nopHolder](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done writing descriptor (# \"\n                   << sequenceNumber << \")\";\n      }));\n}\n\nvoid ChannelImpl::readReadyToReceive(SendOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n  SendOperation& op = *opIter;\n\n  auto nopHolderIn = std::make_shared<NopHolder<ReadyToReceive>>();\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading ready-to-receive (#\"\n             << op.sequenceNumber << \")\";\n  readyToReceiveConnection_->read(\n      *nopHolderIn, callbackWrapper_([opIter, nopHolderIn](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done reading ready-to-receive (# \"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingReadyToReceive = true;\n        if (!impl.error_) {\n          const auto& readyToReceive = nopHolderIn->getObject();\n          opIter->remoteNicIdx = readyToReceive.destinationNicIdx;\n        }\n        impl.sendOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::waitForSendCudaEvent(SendOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  SendOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is waiting for CUDA event to send (#\"\n             << op.sequenceNumber << \")\";\n  context_->waitForCudaEvent(\n      op.event, callbackWrapper_([opIter](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done waiting for CUDA event to send (# \"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneWaitingForCudaEvent = true;\n        impl.sendOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::sendOverIb(SendOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  SendOperation& op = *opIter;\n\n  IbvNic& localNic = context_->getIbvNic(op.localNicIdx);\n  IbvQueuePair& qp = queuePairs_[op.localNicIdx][op.remoteNicIdx].queuePair;\n  size_t chunkSize =\n      queuePairs_[op.localNicIdx][op.remoteNicIdx].maximumMessageSize;\n\n  // This could be VEEERY slow the first time we encounter the buffer, but the\n  // result will be cached and subsequent calls will be much faster.\n  IbvMemoryRegion& mr = localNic.registerMemory(op.buffer);\n\n  size_t numChunks = ceilOfRatio(op.length, chunkSize);\n  for (size_t chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {\n    IbvNic::SendInfo info;\n    info.addr =\n        reinterpret_cast<uint8_t*>(op.buffer.ptr) + chunkIdx * chunkSize;\n    info.length = std::min(op.length - chunkIdx * chunkSize, chunkSize);\n    info.lkey = mr->lkey;\n\n    TP_VLOG(6) << \"Channel \" << id_ << \" is sending chunk #\" << chunkIdx\n               << \" (out of \" << numChunks << \") of tensor #\"\n               << op.sequenceNumber << \" on QP \" << qp->qp_num;\n    localNic.postSend(\n        qp, info, callbackWrapper_([opIter, chunkIdx](ChannelImpl& impl) {\n          TP_VLOG(6) << \"Channel \" << impl.id_ << \" done sending chunk #\"\n                     << chunkIdx << \" of tensor #\" << opIter->sequenceNumber;\n          opIter->numChunksBeingSent--;\n          impl.sendOps_.advanceOperation(opIter);\n\n          impl.numSendsInFlight_--;\n          impl.tryCleanup();\n        }));\n    op.numChunksBeingSent++;\n    numSendsInFlight_++;\n  }\n}\n\nvoid ChannelImpl::callSendCallback(SendOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  SendOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::recvImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TRecvCallback callback) {\n  size_t localGpuIdx = cudaDeviceForPointer(\n      context_->getCudaLib(), buffer.unwrap<CudaBuffer>().ptr);\n  size_t localNicIdx = context_->getGpuToNicMapping()[localGpuIdx];\n\n  RecvOpIter opIter = recvOps_.emplaceBack(\n      sequenceNumber,\n      buffer.unwrap<CudaBuffer>(),\n      length,\n      std::move(callback),\n      localGpuIdx,\n      localNicIdx);\n  opIter->event.record(buffer.unwrap<CudaBuffer>().stream);\n\n  recvOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceRecvOperation(\n    RecvOpIter opIter,\n    RecvOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  RecvOperation& op = *opIter;\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the descriptor control connection.\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::READING_DESCRIPTOR,\n      /*cond=*/!error_ && state_ == ESTABLISHED &&\n          prevOpState >= RecvOperation::READING_DESCRIPTOR,\n      /*actions=*/{&ChannelImpl::readDescriptor});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::READING_DESCRIPTOR,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ && op.doneReadingDescriptor,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // This doesn't strictly need to go after the previous op, but it doesn't make\n  // sense to busy poll multiple events if only one of them is actually able to\n  // then make progress.\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::READING_DESCRIPTOR,\n      /*to=*/RecvOperation::WAITING_FOR_CUDA_EVENT,\n      /*cond=*/!error_ && op.doneReadingDescriptor &&\n          prevOpState >= RecvOperation::RECEIVING_OVER_IB,\n      /*actions=*/{&ChannelImpl::waitForRecvCudaEvent});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::WAITING_FOR_CUDA_EVENT,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ && op.doneWaitingForCudaEvent,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of recv calls on InfiniBand queue pair and write calls on the completion\n  // control connection.\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::WAITING_FOR_CUDA_EVENT,\n      /*to=*/RecvOperation::RECEIVING_OVER_IB,\n      /*cond=*/!error_ && op.doneWaitingForCudaEvent &&\n          prevOpState >= RecvOperation::RECEIVING_OVER_IB,\n      /*actions=*/{&ChannelImpl::recvOverIbAndWriteReadyToRecive});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::RECEIVING_OVER_IB,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/op.numChunksBeingReceived == 0,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n}\n\nvoid ChannelImpl::readDescriptor(RecvOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading descriptor (#\"\n             << op.sequenceNumber << \")\";\n  auto nopHolderIn = std::make_shared<NopHolder<Descriptor>>();\n  descriptorConnection_->read(\n      *nopHolderIn, callbackWrapper_([opIter, nopHolderIn](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done reading descriptor (# \"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingDescriptor = true;\n        if (!impl.error_) {\n          Descriptor& nopDescriptor = nopHolderIn->getObject();\n          opIter->remoteNicIdx = nopDescriptor.originNicIdx;\n        }\n        impl.recvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::waitForRecvCudaEvent(RecvOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is waiting for CUDA event to recv (#\"\n             << op.sequenceNumber << \")\";\n  context_->waitForCudaEvent(\n      op.event, callbackWrapper_([opIter](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done waiting for CUDA event to recv (# \"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneWaitingForCudaEvent = true;\n        impl.recvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::recvOverIbAndWriteReadyToRecive(RecvOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  RecvOperation& op = *opIter;\n\n  IbvNic& localNic = context_->getIbvNic(op.localNicIdx);\n  IbvQueuePair& qp = queuePairs_[op.localNicIdx][op.remoteNicIdx].queuePair;\n  size_t chunkSize =\n      queuePairs_[op.localNicIdx][op.remoteNicIdx].maximumMessageSize;\n\n  // This could be VEEERY slow the first time we encounter the buffer, but the\n  // result will be cached and subsequent calls will be much faster.\n  IbvMemoryRegion& mr = localNic.registerMemory(op.buffer);\n\n  size_t numChunks = ceilOfRatio(op.length, chunkSize);\n  for (size_t chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {\n    IbvNic::RecvInfo info;\n    info.addr =\n        reinterpret_cast<uint8_t*>(op.buffer.ptr) + chunkIdx * chunkSize;\n    info.length = std::min(op.length - chunkIdx * chunkSize, chunkSize);\n    info.lkey = mr->lkey;\n\n    TP_VLOG(6) << \"Channel \" << id_ << \" is receiving chunk #\" << chunkIdx\n               << \" (out of \" << numChunks << \") of tensor #\"\n               << op.sequenceNumber << \" on QP \" << qp->qp_num;\n    localNic.postRecv(\n        qp, info, callbackWrapper_([opIter, chunkIdx](ChannelImpl& impl) {\n          TP_VLOG(6) << \"Channel \" << impl.id_ << \" done receiving chunk #\"\n                     << chunkIdx << \" of tensor #\" << opIter->sequenceNumber;\n          opIter->numChunksBeingReceived--;\n          impl.recvOps_.advanceOperation(opIter);\n\n          impl.numRecvsInFlight_--;\n          impl.tryCleanup();\n        }));\n    op.numChunksBeingReceived++;\n    numRecvsInFlight_++;\n  }\n\n  auto nopHolderOut = std::make_shared<NopHolder<ReadyToReceive>>();\n  ReadyToReceive& nopReadyToReceive = nopHolderOut->getObject();\n  nopReadyToReceive.destinationNicIdx = op.localNicIdx;\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing ready-to-receive (#\"\n             << op.sequenceNumber << \")\";\n  readyToReceiveConnection_->write(\n      *nopHolderOut,\n      callbackWrapper_([sequenceNumber{opIter->sequenceNumber},\n                        nopHolderOut](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done writing ready-to-receive (#\" << sequenceNumber\n                   << \")\";\n      }));\n}\n\nvoid ChannelImpl::callRecvCallback(RecvOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  RecvOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::handleErrorImpl() {\n  sendOps_.advanceAllOperations();\n  recvOps_.advanceAllOperations();\n\n  for (size_t localNicIdx = 0; localNicIdx < numLocalNics_; localNicIdx++) {\n    for (size_t remoteNicIdx = 0; remoteNicIdx < numRemoteNics_;\n         remoteNicIdx++) {\n      transitionIbvQueuePairToError(\n          context_->getIbvLib(),\n          queuePairs_[localNicIdx][remoteNicIdx].queuePair);\n    }\n  }\n\n  tryCleanup();\n\n  descriptorConnection_->close();\n  readyToReceiveConnection_->close();\n}\n\nvoid ChannelImpl::tryCleanup() {\n  TP_DCHECK(context_->inLoop());\n\n  if (error_) {\n    if (numSendsInFlight_ == 0 && numRecvsInFlight_ == 0) {\n      cleanup();\n    } else {\n      TP_VLOG(9) << \"Connection \" << id_\n                 << \" cannot proceed to cleanup because it has \"\n                 << numSendsInFlight_ << \" pending send requests and \"\n                 << numRecvsInFlight_ << \" pending recv requests\";\n    }\n  }\n}\n\nvoid ChannelImpl::cleanup() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(8) << \"Connection \" << id_ << \" is cleaning up\";\n\n  queuePairs_.clear();\n\n  context_->unenroll(*this);\n}\n\n} // namespace cuda_gdr\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/channel_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <list>\n#include <memory>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include <nop/serializer.h>\n#include <nop/structure.h>\n\n#include <tensorpipe/channel/channel_impl_boilerplate.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_buffer.h>\n#include <tensorpipe/common/ibv.h>\n#include <tensorpipe/common/state_machine.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_gdr {\n\nclass ContextImpl;\n\n// Ideally we would use NOP_EXTERNAL_STRUCTURE instead of defining the following\n// two structs, but we tried so in D26460332 and failed because a bug in GCC 5.5\n// (and probably other versions) requires every nop structure used inside a\n// std::vector to have an explicit non-defaulted default constructor, which is\n// something we cannot do with NOP_EXTERNAL_STRUCTURE and forces us to re-define\n// separate structs.\n\n// Replicate the IbvLib::gid struct so we can serialize it with libnop.\nstruct NopIbvGid {\n  uint64_t subnetPrefix;\n  uint64_t interfaceId;\n  NOP_STRUCTURE(NopIbvGid, subnetPrefix, interfaceId);\n\n  void fromIbvGid(const IbvLib::gid& globalIdentifier) {\n    subnetPrefix = globalIdentifier.global.subnet_prefix;\n    interfaceId = globalIdentifier.global.interface_id;\n  }\n\n  IbvLib::gid toIbvGid() const {\n    IbvLib::gid globalIdentifier;\n    globalIdentifier.global.subnet_prefix = subnetPrefix;\n    globalIdentifier.global.interface_id = interfaceId;\n    return globalIdentifier;\n  }\n};\n\n// Replicate the IbvSetupInformation struct so we can serialize it with libnop.\nstruct NopIbvSetupInformation {\n  // This pointless constructor is needed to work around a bug in GCC 5.5 (and\n  // possibly other versions). It appears to be needed in the nop types that\n  // are used inside std::vectors.\n  NopIbvSetupInformation() {}\n\n  uint32_t localIdentifier;\n  NopIbvGid globalIdentifier;\n  uint32_t queuePairNumber;\n  IbvLib::mtu maximumTransmissionUnit;\n  uint32_t maximumMessageSize;\n  NOP_STRUCTURE(\n      NopIbvSetupInformation,\n      localIdentifier,\n      globalIdentifier,\n      queuePairNumber,\n      maximumTransmissionUnit,\n      maximumMessageSize);\n\n  void fromIbvSetupInformation(const IbvSetupInformation& setupInfo) {\n    localIdentifier = setupInfo.localIdentifier;\n    globalIdentifier.fromIbvGid(setupInfo.globalIdentifier);\n    queuePairNumber = setupInfo.queuePairNumber;\n    maximumTransmissionUnit = setupInfo.maximumTransmissionUnit;\n    maximumMessageSize = setupInfo.maximumMessageSize;\n  }\n\n  IbvSetupInformation toIbvSetupInformation() const {\n    IbvSetupInformation setupInfo;\n    setupInfo.localIdentifier = localIdentifier;\n    setupInfo.globalIdentifier = globalIdentifier.toIbvGid();\n    setupInfo.queuePairNumber = queuePairNumber;\n    setupInfo.maximumTransmissionUnit = maximumTransmissionUnit;\n    setupInfo.maximumMessageSize = maximumMessageSize;\n    return setupInfo;\n  }\n};\n\nstruct SendOperation {\n  enum State {\n    UNINITIALIZED,\n    READING_READY_TO_RECEIVE,\n    WAITING_FOR_CUDA_EVENT,\n    SENDING_OVER_IB,\n    FINISHED\n  };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReadingReadyToReceive{false};\n  bool doneWaitingForCudaEvent{false};\n  uint64_t numChunksBeingSent{0};\n\n  // Arguments at creation\n  const CudaBuffer buffer;\n  const size_t length;\n  const size_t localNicIdx;\n  TSendCallback callback;\n\n  // Other stuff\n  CudaEvent event;\n  size_t remoteNicIdx;\n\n  SendOperation(\n      CudaBuffer buffer,\n      size_t length,\n      TSendCallback callback,\n      size_t localGpuIdx,\n      size_t localNicIdx)\n      : buffer(buffer),\n        length(length),\n        localNicIdx(localNicIdx),\n        callback(std::move(callback)),\n        event(localGpuIdx) {}\n};\n\nstruct RecvOperation {\n  enum State {\n    UNINITIALIZED,\n    READING_DESCRIPTOR,\n    WAITING_FOR_CUDA_EVENT,\n    RECEIVING_OVER_IB,\n    FINISHED\n  };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReadingDescriptor{false};\n  bool doneWaitingForCudaEvent{false};\n  uint64_t numChunksBeingReceived{0};\n\n  // Arguments at creation\n  const CudaBuffer buffer;\n  const size_t length;\n  const size_t localNicIdx;\n  TSendCallback callback;\n\n  // Other stuff\n  size_t remoteNicIdx;\n  CudaEvent event;\n\n  RecvOperation(\n      CudaBuffer buffer,\n      size_t length,\n      TSendCallback callback,\n      size_t deviceIdx,\n      size_t localNicIdx)\n      : buffer(buffer),\n        length(length),\n        localNicIdx(localNicIdx),\n        callback(std::move(callback)),\n        event(deviceIdx) {}\n};\n\n// First \"round\" of handshake.\nstruct HandshakeNumNics {\n  size_t numNics;\n  NOP_STRUCTURE(HandshakeNumNics, numNics);\n};\n\n// Second \"round\" of handshake.\nstruct HandshakeSetupInfo {\n  std::vector<std::vector<NopIbvSetupInformation>> setupInfo;\n  NOP_STRUCTURE(HandshakeSetupInfo, setupInfo);\n};\n\n// From sender to receiver (through pipe).\nstruct Descriptor {\n  size_t originNicIdx;\n  NOP_STRUCTURE(Descriptor, originNicIdx);\n};\n\n// From receiver to sender (through channel's connection).\nstruct ReadyToReceive {\n  size_t destinationNicIdx;\n  NOP_STRUCTURE(ReadyToReceive, destinationNicIdx);\n};\n\nclass ChannelImpl final\n    : public ChannelImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  ChannelImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::shared_ptr<transport::Connection> descriptorConnection,\n      std::shared_ptr<transport::Connection> readyToReceiveConnection);\n\n protected:\n  // Implement the entry points called by ChannelImplBoilerplate.\n  void initImplFromLoop() override;\n  void sendImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TSendCallback callback) override;\n  void recvImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TRecvCallback callback) override;\n  void handleErrorImpl() override;\n\n private:\n  const std::shared_ptr<transport::Connection> descriptorConnection_;\n  const std::shared_ptr<transport::Connection> readyToReceiveConnection_;\n\n  enum State {\n    INITIALIZING = 1,\n    WAITING_FOR_HANDSHAKE_NUM_NICS,\n    WAITING_FOR_HANDSHAKE_SETUP_INFO,\n    ESTABLISHED,\n  };\n  State state_{INITIALIZING};\n\n  std::vector<size_t> localGpuToNic_;\n  size_t numLocalNics_{0};\n  size_t numRemoteNics_{0};\n\n  // This struct is used to bundle the queue pair with some additional metadata.\n  struct QueuePair {\n    IbvQueuePair queuePair;\n    // The CUDA GDR channel could be asked to transmit arbitrarily large tensors\n    // and in principle it could directly forward them to the NIC as they are.\n    // However IB NICs have limits on the size of each message. Hence we\n    // determine these sizes, one per queue pair (as the minimum of the local\n    // and remote sizes) and then split our tensors in chunks of that size.\n    uint32_t maximumMessageSize;\n  };\n  std::vector<std::vector<QueuePair>> queuePairs_;\n\n  OpsStateMachine<ChannelImpl, SendOperation> sendOps_{\n      *this,\n      &ChannelImpl::advanceSendOperation};\n  using SendOpIter = decltype(sendOps_)::Iter;\n  OpsStateMachine<ChannelImpl, RecvOperation> recvOps_{\n      *this,\n      &ChannelImpl::advanceRecvOperation};\n  using RecvOpIter = decltype(recvOps_)::Iter;\n\n  uint32_t numSendsInFlight_{0};\n  uint32_t numRecvsInFlight_{0};\n\n  // Callbacks for the initial handshake phase.\n  void onReadHandshakeNumNics(const HandshakeNumNics& nopHandshakeNumNics);\n  void onReadHandshakeSetupInfo(\n      const HandshakeSetupInfo& nopHandshakeSetupInfo);\n\n  // Cleanup methods for teardown.\n  void tryCleanup();\n  void cleanup();\n\n  // State machines for send and recv ops.\n  void advanceSendOperation(\n      SendOpIter opIter,\n      SendOperation::State prevOpState);\n  void advanceRecvOperation(\n      RecvOpIter opIter,\n      RecvOperation::State prevOpState);\n\n  // Actions (i.e., methods that begin a state transition).\n  // For send operations:\n  void writeDescriptor(SendOpIter opIter);\n  void readReadyToReceive(SendOpIter opIter);\n  void waitForSendCudaEvent(SendOpIter opIter);\n  void sendOverIb(SendOpIter opIter);\n  void callSendCallback(SendOpIter opIter);\n  // For recv operations:\n  void readDescriptor(RecvOpIter opIter);\n  void waitForRecvCudaEvent(RecvOpIter opIter);\n  void recvOverIbAndWriteReadyToRecive(RecvOpIter opIter);\n  void callRecvCallback(RecvOpIter opIter);\n};\n\n} // namespace cuda_gdr\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/constants.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstdint>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_gdr {\n\nnamespace {\n\n// We should probably allow these to be user-configured. But, for now, we'll set\n// them to the lowest value they can have, the rationale being that this way\n// they will always be valid.\nconstexpr uint8_t kPortNum = 1;\nconstexpr uint8_t kGlobalIdentifierIndex = 0;\n\n// FIXME Instead of hardcoding the next three values, we could use\n// ibv_query_device to obtain max_cqe, max_qp_wr and max_srq_wr and deduce from\n// them the maximum allowed values for these parameters.\n\nconstexpr uint32_t kNumRecvs = 1024;\nconstexpr uint32_t kNumSends = 1024;\n\n// How many elements the completion queue should be able to hold. These elements\n// will be either the completed receive requests of the SRQ, or the completed\n// send requests from a connection's queue pair. We can bound the former value\n// but not the latter, so we try to add some margin.\nconstexpr int kCompletionQueueSize = kNumRecvs + kNumSends;\n\n// How many work completions to poll from the completion queue at each reactor\n// iteration.\nconstexpr int kNumPolledWorkCompletions = 32;\n\n} // namespace\n\n} // namespace cuda_gdr\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_gdr/context_impl.h>\n\n#include <array>\n#include <climits>\n#include <cstdlib>\n#include <functional>\n#include <string>\n#include <tuple>\n#include <type_traits>\n#include <unordered_map>\n#include <unordered_set>\n#include <utility>\n#include <vector>\n\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <unistd.h>\n\n#include <cuda.h>\n#include <cuda_runtime.h>\n\n#include <tensorpipe/channel/cuda_gdr/channel_impl.h>\n#include <tensorpipe/channel/cuda_gdr/error.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error_macros.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_gdr {\n\nnamespace {\n\n// NOTE: This is an incomplete implementation of C++17's `std::apply`.\n// It's intended to only work for methods of IbvNic.\ntemplate <class TMethod, class TArgsTuple, std::size_t... I>\nauto applyFuncImpl(\n    IbvNic& subject,\n    TMethod&& method,\n    TArgsTuple&& args,\n    std::index_sequence<I...> /* unused */) {\n  return ((subject).*(method))(std::get<I>(std::forward<TArgsTuple>(args))...);\n}\n\ntemplate <class TMethod, class TArgsTuple>\nauto applyFunc(IbvNic& subject, TMethod&& method, TArgsTuple&& args) {\n  return applyFuncImpl(\n      subject,\n      std::forward<TMethod>(method),\n      std::forward<TArgsTuple>(args),\n      std::make_index_sequence<\n          std::tuple_size<std::remove_reference_t<TArgsTuple>>::value>{});\n}\n\n// We can only pass CUDA pointers to InfiniBand (for example when registering\n// some memory) if InfiniBand \"knows about\" CUDA. Those pointers refer to the\n// section of the process's virtual address space that is being used by CUDA to\n// represent device memory (as part of CUDA's unified memory approach). Thus\n// InfiniBand needs to talk to CUDA to translate those pointers to physical PCIe\n// hardware addresses.\n// This is achieved by CUDA providing a so-called \"peer memory client\" and\n// registering it with the InfiniBand kernel module. The peer memory client is\n// itself a kernel module, see https://github.com/Mellanox/nv_peer_memory.\n// The \"catch\" is that the whole \"peer memory client\" system is not part of the\n// official Linux InfiniBand. It's provided by a Mellanox extension, and it's\n// part of their \"OpenFabrics Enterprise Distribution\" (MLNX_OFED), see\n// https://www.mellanox.com/products/infiniband-drivers/linux/mlnx_ofed. (In\n// particular, on Ubuntu, this seems to be provided by the mlnx-ofed-kernel-dkms\n// package). Note that this difference between \"vanilla\" InfiniBand and OFED is\n// only in kernel space; from our perspective the two have the same API. Also\n// note that Mellanox has tried at least a couple of time to upstream this, but\n// apparently without success:\n// https://lore.kernel.org/linux-rdma/1412602019-30659-1-git-send-email-yishaih@mellanox.com/\n// https://lore.kernel.org/linux-rdma/1455207177-11949-1-git-send-email-artemyko@mellanox.com/\n// The check we use to verify if the peer memory client is active is the same as\n// NCCL's one, see\n// https://github.com/NVIDIA/nccl/blob/ca8485b0d01ca6dfa02f4454932011e68b461175/src/transport/net_ib.cc#L216-L230\n// Whereas TensorFlow does it slightly differently, see\n// https://github.com/tensorflow/networking/blob/671e2548b602f93a6c6502432b8bc131b5cc4914/tensorflow_networking/gdr/gdr_memory_manager.cc#L43-L60\nstatic std::string kNvMemModulePath =\n    \"/sys/kernel/mm/memory_peers/nv_mem/version\";\nstatic std::string kNvidiaPeermemModulePath =\n    \"/sys/kernel/mm/memory_peers/nvidia-peermem/version\";\n\nbool isNvidiaPeerMemoryClientActive() {\n  int rv1 = ::access(kNvMemModulePath.c_str(), F_OK);\n  int rv2 = ::access(kNvidiaPeermemModulePath.c_str(), F_OK);\n  return rv1 >= 0 || rv2 >= 0;\n}\n\n// The PCI topology is a tree, with the root being the host bridge, the leaves\n// being the devices, and the other nodes being switches. We want to match each\n// GPU to the InfiniBand NIC with which it shares the longest \"prefix\" in this\n// tree, as that will route the data transfer away from the most \"central\"\n// switches and from the host bridge. We extract the \"path\" of a device in the\n// PCI tree by obtaining its \"canonical\" path in Linux's sysfs, which contains\n// one component for each other device that is traversed. The format of such a\n// path is /sys/devices/pci0123:45(/0123:45:67.8)+\");\n// See https://www.kernel.org/doc/ols/2005/ols2005v1-pages-321-334.pdf for more\n// info on sysfs.\n\nconst std::string kPciPathPrefix = \"/sys/devices/pci\";\n\nstd::string getPciPathForIbvNic(const std::string& nicName) {\n  std::array<char, PATH_MAX> pciPath;\n  char* rv = ::realpath(\n      (\"/sys/class/infiniband/\" + nicName + \"/device\").c_str(), pciPath.data());\n  TP_THROW_SYSTEM_IF(rv == nullptr, errno);\n  TP_DCHECK(rv == pciPath.data());\n\n  std::string res(pciPath.data());\n  TP_DCHECK(res.substr(0, kPciPathPrefix.size()) == kPciPathPrefix)\n      << \"Bad PCI path for InfiniBand NIC \" << nicName << \": \" << res;\n  return res;\n}\n\nstd::string getPciPathForGpu(int gpuIdx) {\n  // The CUDA documentation says the ID will consist of a domain (16 bits), a\n  // bus (8 bits), a device (5 bits) and a function (3 bits). When represented\n  // as hex, including the separators and the null terminator, this takes up 13\n  // bytes. However NCCL seems to suggests that sometimes the domain takes twice\n  // that size, and hence 17 bytes are necessary.\n  // https://github.com/NVIDIA/nccl/blob/c6dbdb00849027b4e2c277653cbef53729f7213d/src/misc/utils.cc#L49-L53\n  std::array<char, 17> pciDeviceId;\n  TP_CUDA_CHECK(\n      cudaDeviceGetPCIBusId(pciDeviceId.data(), pciDeviceId.size(), gpuIdx));\n\n  // Fun fact: CUDA seems to format hex letters as uppercase, but Linux's sysfs\n  // expects them as lowercase.\n  for (char& c : pciDeviceId) {\n    if ('A' <= c && c <= 'F') {\n      c = c - 'A' + 'a';\n    }\n  }\n\n  std::array<char, PATH_MAX> pciPath;\n  char* rv = ::realpath(\n      (\"/sys/bus/pci/devices/\" + std::string(pciDeviceId.data())).c_str(),\n      pciPath.data());\n  TP_THROW_SYSTEM_IF(rv == nullptr, errno);\n  TP_DCHECK(rv == pciPath.data());\n\n  std::string res(pciPath.data());\n  TP_DCHECK(res.substr(0, kPciPathPrefix.size()) == kPciPathPrefix)\n      << \"Bad PCI path for GPU #\" << gpuIdx << \": \" << res;\n  return res;\n}\n\nsize_t commonPrefixLength(const std::string& a, const std::string& b) {\n  // The length of the longest common prefix is the index of the first char on\n  // which the two strings differ.\n  size_t maxLength = std::min(a.size(), b.size());\n  for (size_t idx = 0; idx < maxLength; idx++) {\n    if (a[idx] != b[idx]) {\n      return idx;\n    }\n  }\n  return maxLength;\n}\n\nstd::vector<std::string> matchGpusToIbvNics(\n    IbvLib& ibvLib,\n    IbvDeviceList& deviceList) {\n  struct NicInfo {\n    std::string name;\n    std::string pciPath;\n  };\n  std::vector<NicInfo> nicInfos;\n  for (size_t deviceIdx = 0; deviceIdx < deviceList.size(); deviceIdx++) {\n    IbvLib::device& device = deviceList[deviceIdx];\n    std::string deviceName(TP_CHECK_IBV_PTR(ibvLib.get_device_name(&device)));\n    std::string pciPath = getPciPathForIbvNic(deviceName);\n    TP_VLOG(5) << \"Resolved InfiniBand NIC \" << deviceName << \" to PCI path \"\n               << pciPath;\n    nicInfos.push_back(NicInfo{std::move(deviceName), std::move(pciPath)});\n  }\n\n  int numGpus;\n  TP_CUDA_CHECK(cudaGetDeviceCount(&numGpus));\n\n  std::vector<std::string> gpuIdxToIbvNicName;\n  for (int gpuIdx = 0; gpuIdx < numGpus; gpuIdx++) {\n    std::string gpuPciPath = getPciPathForGpu(gpuIdx);\n    TP_VLOG(5) << \"Resolved GPU #\" << gpuIdx << \" to PCI path \" << gpuPciPath;\n    ssize_t bestMatchLength = -1;\n    const std::string* bestMatchName = nullptr;\n    for (const auto& nicInfo : nicInfos) {\n      ssize_t matchLength = commonPrefixLength(gpuPciPath, nicInfo.pciPath);\n      if (matchLength > bestMatchLength) {\n        bestMatchLength = matchLength;\n        bestMatchName = &nicInfo.name;\n      }\n    }\n    TP_DCHECK_GE(bestMatchLength, 0);\n    TP_DCHECK(bestMatchName != nullptr);\n    gpuIdxToIbvNicName.push_back(*bestMatchName);\n  }\n\n  return gpuIdxToIbvNicName;\n}\n\n// In GpuDirect, the way an InfiniBand NIC accesses the GPU's memory is by\n// issuing a PCIe read to some address within the GPU's \"base address register\"\n// (BAR), i.e., a slice of the \"physical\" PCIe address space that belongs to the\n// GPU. BARs in principle provide only \"windows\" into a device's memory, and\n// could be re-mapped over time. When a CUDA allocation is registered on\n// InfiniBand, its backing memory is mapped into the BAR and its address is\n// given to the InfiniBand driver. That mapping must remain in place until the\n// registration is destroyed. See\n// https://docs.nvidia.com/cuda/gpudirect-rdma/index.html#how-gpudirect-rdma-works.\n// CUDA GDR doesn't work well with that, because:\n// - It attempts to register the entire user allocation with InfiniBand, hence\n//   allocations that exceed the BAR's size can never be transferred.\n// - It \"caches\" (or \"leaks\") the InfiniBand registration, because creating it\n//   is expensive, so that this can be done once and then reused. This means\n//   that even if each tensor that is sent is smaller than the BAR, we'd start\n//   seeing failures if their cumulative size exceeded the one of the BAR.\n// On some GPUs though the BAR size spans the entire GPU memory. In such cases\n// what CUDA GDR is doing should be \"safe\". In all other cases, however, it\n// isn't, and it's better to thus disable CUDA GDR entirely in these scenarios,\n// so that users end up using a fully functioning (but slower) CUDA channel.\n// There are multiple BARs for each GPU, but from an experimental investigation\n// it seems the one that maps to the device's memory is BAR1. The programmatic\n// way that the Linux kernel offers to access information about PCIe and its\n// BARs is through sysfs. See\n// https://www.kernel.org/doc/html/latest/PCI/sysfs-pci.html.\n\nsize_t getBar1SizeOfGpu(int gpuIdx) {\n  std::string pciPath = getPciPathForGpu(gpuIdx);\n  pciPath += \"/resource1\";\n\n  struct stat bar1Stats;\n  int rv = ::stat(pciPath.c_str(), &bar1Stats);\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n\n  return bar1Stats.st_size;\n}\n\nbool allGpusHaveEnoughBar1Size() {\n  int numGpus;\n  TP_CUDA_CHECK(cudaGetDeviceCount(&numGpus));\n  for (int gpuIdx = 0; gpuIdx < numGpus; gpuIdx++) {\n    cudaDeviceProp gpuProps;\n    TP_CUDA_CHECK(cudaGetDeviceProperties(&gpuProps, gpuIdx));\n    size_t memorySize = gpuProps.totalGlobalMem;\n    size_t bar1Size = getBar1SizeOfGpu(gpuIdx);\n    TP_VLOG(5) << \"GPU #\" << gpuIdx << \" has \" << memorySize\n               << \" bytes of memory and the size of its PCIe BAR1 is \"\n               << bar1Size << \" bytes\";\n    if (bar1Size < memorySize) {\n      return false;\n    }\n  }\n  return true;\n}\n\n} // namespace\n\nIbvNic::IbvNic(\n    std::string name,\n    IbvLib::device& device,\n    const IbvLib& ibvLib,\n    const CudaLib& cudaLib)\n    : name_(std::move(name)), cudaLib_(cudaLib), ibvLib_(ibvLib) {\n  ctx_ = createIbvContext(ibvLib_, device);\n  pd_ = createIbvProtectionDomain(ibvLib_, ctx_);\n  cq_ = createIbvCompletionQueue(\n      ibvLib_,\n      ctx_,\n      kCompletionQueueSize,\n      /*cq_context=*/nullptr,\n      /*channel=*/nullptr,\n      /*comp_vector=*/0);\n  addr_ = makeIbvAddress(ibvLib_, ctx_, kPortNum, kGlobalIdentifierIndex);\n}\n\nbool IbvNic::pollOnce() {\n  std::array<IbvLib::wc, kNumPolledWorkCompletions> wcs;\n  auto rv = ibvLib_.poll_cq(cq_.get(), wcs.size(), wcs.data());\n\n  if (rv == 0) {\n    return false;\n  }\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n\n  int numSends = 0;\n  int numRecvs = 0;\n  for (int wcIdx = 0; wcIdx < rv; wcIdx++) {\n    IbvLib::wc& wc = wcs[wcIdx];\n\n    TP_VLOG(6) << \"Channel context \" << id_ << \" got work completion on device \"\n               << name_ << \" for request \" << wc.wr_id << \" for QP \"\n               << wc.qp_num << \" with status \"\n               << ibvLib_.wc_status_str(wc.status) << \" and opcode \"\n               << ibvWorkCompletionOpcodeToStr(wc.opcode)\n               << \" (byte length: \" << wc.byte_len << \")\";\n\n    auto iter = requestsInFlight_.find(wc.wr_id);\n    TP_THROW_ASSERT_IF(iter == requestsInFlight_.end())\n        << \"Got work completion with unknown ID \" << wc.wr_id;\n\n    IbvLib::wc_opcode opcode = std::move(std::get<0>(iter->second));\n    std::function<void(const Error&)> cb = std::move(std::get<1>(iter->second));\n    requestsInFlight_.erase(iter);\n\n    if (wc.status != IbvLib::WC_SUCCESS) {\n      cb(TP_CREATE_ERROR(IbvError, ibvLib_.wc_status_str(wc.status)));\n    } else {\n      cb(Error::kSuccess);\n    }\n\n    switch (opcode) {\n      case IbvLib::WC_RECV:\n        numRecvs++;\n        break;\n      case IbvLib::WC_SEND:\n        numSends++;\n        break;\n      default:\n        TP_THROW_ASSERT() << \"Unknown opcode: \" << opcode;\n    }\n  }\n\n  numAvailableSendSlots_ += numSends;\n  while (!sendsWaitingForSlots_.empty() && numAvailableSendSlots_ > 0) {\n    applyFunc(\n        *this, &IbvNic::postSend, std::move(sendsWaitingForSlots_.front()));\n    sendsWaitingForSlots_.pop_front();\n  }\n\n  numAvailableRecvSlots_ += numRecvs;\n  while (!recvsWaitingForSlots_.empty() && numAvailableRecvSlots_ > 0) {\n    applyFunc(\n        *this, &IbvNic::postRecv, std::move(recvsWaitingForSlots_.front()));\n    recvsWaitingForSlots_.pop_front();\n  }\n\n  return true;\n}\n\nvoid IbvNic::postSend(\n    IbvQueuePair& qp,\n    SendInfo info,\n    std::function<void(const Error&)> cb) {\n  if (numAvailableSendSlots_ > 0) {\n    IbvLib::sge list;\n    list.addr = reinterpret_cast<uint64_t>(info.addr);\n    list.length = info.length;\n    list.lkey = info.lkey;\n\n    IbvLib::send_wr wr;\n    std::memset(&wr, 0, sizeof(wr));\n    wr.wr_id = nextRequestId_++;\n    wr.sg_list = &list;\n    wr.num_sge = 1;\n    wr.opcode = IbvLib::WR_SEND;\n\n    IbvLib::send_wr* badWr = nullptr;\n    TP_VLOG(6) << \"Channel context \" << id_ << \" posting send on device \"\n               << name_ << \" for QP \" << qp->qp_num;\n    TP_CHECK_IBV_INT(ibvLib_.post_send(qp.get(), &wr, &badWr));\n    TP_THROW_ASSERT_IF(badWr != nullptr);\n    numAvailableSendSlots_--;\n    requestsInFlight_.emplace(\n        wr.wr_id, std::make_tuple(IbvLib::WC_SEND, std::move(cb)));\n  } else {\n    TP_VLOG(6) << \"Channel context \" << id_ << \" queueing up send on device \"\n               << name_ << \" for QP \" << qp->qp_num;\n    sendsWaitingForSlots_.emplace_back(qp, info, std::move(cb));\n  }\n}\n\nvoid IbvNic::postRecv(\n    IbvQueuePair& qp,\n    RecvInfo info,\n    std::function<void(const Error&)> cb) {\n  if (numAvailableRecvSlots_ > 0) {\n    IbvLib::sge list;\n    list.addr = reinterpret_cast<uint64_t>(info.addr);\n    list.length = info.length;\n    list.lkey = info.lkey;\n\n    IbvLib::recv_wr wr;\n    std::memset(&wr, 0, sizeof(wr));\n    wr.wr_id = nextRequestId_++;\n    wr.sg_list = &list;\n    wr.num_sge = 1;\n\n    IbvLib::recv_wr* badWr = nullptr;\n    TP_VLOG(6) << \"Channel context \" << id_ << \" posting recv on device \"\n               << name_ << \" for QP \" << qp->qp_num;\n    TP_CHECK_IBV_INT(ibvLib_.post_recv(qp.get(), &wr, &badWr));\n    TP_THROW_ASSERT_IF(badWr != nullptr);\n    numAvailableRecvSlots_--;\n    requestsInFlight_.emplace(\n        wr.wr_id, std::make_tuple(IbvLib::WC_RECV, std::move(cb)));\n  } else {\n    TP_VLOG(6) << \"Channel context \" << id_ << \" queueing up recv on device \"\n               << name_ << \" for QP \" << qp->qp_num;\n    recvsWaitingForSlots_.emplace_back(qp, info, std::move(cb));\n  }\n}\n\nIbvMemoryRegion& IbvNic::registerMemory(CudaBuffer buffer) {\n  // FIXME Instead of re-querying the device, have the caller provide it.\n  CudaDeviceGuard guard(cudaDeviceForPointer(cudaLib_, buffer.ptr));\n\n  CUdeviceptr basePtr;\n  size_t allocSize;\n  TP_CUDA_DRIVER_CHECK(\n      cudaLib_,\n      cudaLib_.memGetAddressRange(\n          &basePtr, &allocSize, reinterpret_cast<CUdeviceptr>(buffer.ptr)));\n\n  unsigned long long bufferId;\n  TP_CUDA_DRIVER_CHECK(\n      cudaLib_,\n      cudaLib_.pointerGetAttribute(\n          &bufferId, CU_POINTER_ATTRIBUTE_BUFFER_ID, basePtr));\n\n  auto iter = memoryRegions_.find(bufferId);\n  if (iter != memoryRegions_.end()) {\n    return iter->second;\n  }\n  std::tie(iter, std::ignore) = memoryRegions_.emplace(\n      bufferId,\n      createIbvMemoryRegion(\n          ibvLib_,\n          pd_,\n          reinterpret_cast<void*>(basePtr),\n          allocSize,\n          IbvLib::ACCESS_LOCAL_WRITE));\n  return iter->second;\n}\n\nbool IbvNic::readyToClose() const {\n  return requestsInFlight_.empty();\n}\n\nvoid IbvNic::setId(std::string id) {\n  id_ = std::move(id);\n}\n\nstd::shared_ptr<ContextImpl> ContextImpl::create(\n    optional<std::vector<std::string>> gpuIdxToNicName) {\n  Error error;\n\n  CudaLib cudaLib;\n  std::tie(error, cudaLib) = CudaLib::create();\n  // FIXME Instead of throwing away the error and setting a bool, we should have\n  // a way to set the context in an error state, and use that for viability.\n  if (error) {\n    TP_VLOG(5)\n        << \"CUDA GDR channel is not viable because libcuda could not be loaded: \"\n        << error.what();\n    return nullptr;\n  }\n\n  IbvLib ibvLib;\n  std::tie(error, ibvLib) = IbvLib::create();\n  // FIXME Instead of throwing away the error and setting a bool, we should have\n  // a way to set the context in an error state, and use that for viability.\n  if (error) {\n    TP_VLOG(5)\n        << \"CUDA GDR channel is not viable because libibverbs could not be loaded: \"\n        << error.what();\n    return nullptr;\n  }\n\n  if (!isNvidiaPeerMemoryClientActive()) {\n    TP_VLOG(5)\n        << \"CUDA GDR channel is not viable because the nv_peer_mem kernel module isn't active\";\n    return nullptr;\n  }\n\n  IbvDeviceList deviceList;\n  std::tie(error, deviceList) = IbvDeviceList::create(ibvLib);\n  if (error && error.isOfType<SystemError>() &&\n      error.castToType<SystemError>()->errorCode() == ENOSYS) {\n    TP_VLOG(5)\n        << \"CUDA GDR channel couldn't get list of InfiniBand devices because the kernel module isn't \"\n        << \"loaded\";\n    return nullptr;\n  }\n  TP_THROW_ASSERT_IF(error)\n      << \"Couldn't get list of InfiniBand devices: \" << error.what();\n  if (deviceList.size() == 0) {\n    TP_VLOG(5)\n        << \"CUDA GDR channel is not viable because it couldn't find any InfiniBand NICs\";\n    return nullptr;\n  }\n\n  // FIXME In principle we could just exclude the GPUs that violate this check\n  // but keep working with the other ones (if any).\n  if (!allGpusHaveEnoughBar1Size()) {\n    TP_VLOG(5)\n        << \"CUDA GDR channel is not viable because some GPUs don't have a large enough PCIe BAR1 size\";\n    return nullptr;\n  }\n\n  std::unordered_map<Device, std::string> deviceDescriptors;\n  for (const auto& device : getCudaDevices(cudaLib)) {\n    deviceDescriptors[device] = \"*\";\n  }\n\n  return std::make_shared<ContextImpl>(\n      std::move(deviceDescriptors),\n      std::move(cudaLib),\n      std::move(ibvLib),\n      std::move(deviceList),\n      std::move(gpuIdxToNicName));\n}\n\nContextImpl::ContextImpl(\n    std::unordered_map<Device, std::string> deviceDescriptors,\n    CudaLib cudaLib,\n    IbvLib ibvLib,\n    IbvDeviceList deviceList,\n    optional<std::vector<std::string>> gpuIdxToNicName)\n    : ContextImplBoilerplate<ContextImpl, ChannelImpl>(\n          std::move(deviceDescriptors)),\n      cudaLib_(std::move(cudaLib)),\n      ibvLib_(std::move(ibvLib)) {\n  std::vector<std::string> actualGpuIdxToNicName;\n  if (gpuIdxToNicName.has_value()) {\n    int numGpus;\n    TP_CUDA_CHECK(cudaGetDeviceCount(&numGpus));\n    TP_THROW_ASSERT_IF(numGpus != gpuIdxToNicName->size())\n        << \"The mapping from GPUs to InfiniBand NICs contains an unexpected \"\n        << \"number of items: found \" << gpuIdxToNicName->size() << \", expected \"\n        << numGpus;\n\n    actualGpuIdxToNicName = std::move(gpuIdxToNicName.value());\n  } else {\n    actualGpuIdxToNicName = matchGpusToIbvNics(ibvLib, deviceList);\n  }\n\n  for (int gpuIdx = 0; gpuIdx < actualGpuIdxToNicName.size(); gpuIdx++) {\n    TP_VLOG(5) << \"CUDA GDR channel mapped GPU #\" << gpuIdx\n               << \" to InfiniBand NIC \" << actualGpuIdxToNicName[gpuIdx];\n  }\n\n  std::unordered_set<std::string> nicNames;\n  for (const auto& nicName : actualGpuIdxToNicName) {\n    nicNames.insert(nicName);\n  }\n\n  std::unordered_map<std::string, size_t> nicNameToNicIdx;\n  // The device index is among all available devices, the NIC index is among the\n  // ones we will use.\n  size_t nicIdx = 0;\n  for (size_t deviceIdx = 0; deviceIdx < deviceList.size(); deviceIdx++) {\n    IbvLib::device& device = deviceList[deviceIdx];\n    std::string deviceName(TP_CHECK_IBV_PTR(ibvLib.get_device_name(&device)));\n    auto iter = nicNames.find(deviceName);\n    if (iter != nicNames.end()) {\n      TP_VLOG(5) << \"CUDA GDR channel is using InfiniBand NIC \" << deviceName\n                 << \" as device #\" << nicIdx;\n      ibvNics_.emplace_back(*iter, device, ibvLib_, cudaLib_);\n      nicNameToNicIdx[*iter] = nicIdx;\n      nicIdx++;\n      nicNames.erase(iter);\n    }\n  }\n  TP_THROW_ASSERT_IF(!nicNames.empty())\n      << \"Couldn't find all the devices I was supposed to use\";\n\n  for (size_t gpuIdx = 0; gpuIdx < actualGpuIdxToNicName.size(); gpuIdx++) {\n    gpuToNic_.push_back(nicNameToNicIdx[actualGpuIdxToNicName[gpuIdx]]);\n  }\n\n  startThread(\"TP_CUDA_GDR_loop\");\n}\n\nconst CudaLib& ContextImpl::getCudaLib() {\n  return cudaLib_;\n}\n\nconst std::vector<size_t>& ContextImpl::getGpuToNicMapping() {\n  return gpuToNic_;\n}\n\nconst IbvLib& ContextImpl::getIbvLib() {\n  return ibvLib_;\n}\n\nIbvNic& ContextImpl::getIbvNic(size_t nicIdx) {\n  TP_DCHECK_LT(nicIdx, ibvNics_.size());\n  return ibvNics_[nicIdx];\n}\n\nbool ContextImpl::pollOnce() {\n  for (IbvNic& ibvNic : ibvNics_) {\n    if (ibvNic.pollOnce()) {\n      return true;\n    }\n  }\n  return pollCudaOnce();\n}\n\nbool ContextImpl::pollCudaOnce() {\n  bool any = false;\n  for (auto iter = pendingCudaEvents_.begin(); iter != pendingCudaEvents_.end();\n       iter++) {\n    const CudaEvent& event = std::get<0>(*iter);\n\n    if (event.query()) {\n      std::function<void(const Error&)> cb = std::move(std::get<1>(*iter));\n      cb(Error::kSuccess);\n      iter = pendingCudaEvents_.erase(iter);\n      any = true;\n    }\n  }\n  return any;\n}\n\nvoid ContextImpl::waitForCudaEvent(\n    const CudaEvent& event,\n    std::function<void(const Error&)> cb) {\n  deferToLoop([this, &event, cb{std::move(cb)}]() mutable {\n    waitForCudaEventFromLoop(event, std::move(cb));\n  });\n}\n\nvoid ContextImpl::waitForCudaEventFromLoop(\n    const CudaEvent& event,\n    std::function<void(const Error&)> cb) {\n  TP_DCHECK(inLoop());\n\n  pendingCudaEvents_.emplace_back(event, std::move(cb));\n}\n\nbool ContextImpl::readyToClose() {\n  for (const IbvNic& ibvNic : ibvNics_) {\n    if (!ibvNic.readyToClose()) {\n      return false;\n    }\n  }\n  return pendingCudaEvents_.empty();\n}\n\nvoid ContextImpl::handleErrorImpl() {\n  stopBusyPolling();\n}\n\nvoid ContextImpl::joinImpl() {\n  joinThread();\n\n  // FIXME It would be nice if this could be done by the thread itself just\n  // before it returns, rather than by the user.\n  ibvNics_.clear();\n}\n\nvoid ContextImpl::setIdImpl() {\n  for (IbvNic& ibvNic : ibvNics_) {\n    ibvNic.setId(id_);\n  }\n}\n\nstd::shared_ptr<Channel> ContextImpl::createChannel(\n    std::vector<std::shared_ptr<transport::Connection>> connections,\n    Endpoint /* unused */) {\n  TP_DCHECK_EQ(numConnectionsNeeded(), connections.size());\n  return createChannelInternal(\n      std::move(connections[0]), std::move(connections[1]));\n}\n\nsize_t ContextImpl::numConnectionsNeeded() const {\n  return 2;\n}\n\n} // namespace cuda_gdr\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <deque>\n#include <functional>\n#include <list>\n#include <map>\n#include <memory>\n#include <string>\n#include <tuple>\n#include <unordered_map>\n#include <vector>\n\n#include <tensorpipe/channel/context_impl_boilerplate.h>\n#include <tensorpipe/channel/cuda_gdr/constants.h>\n#include <tensorpipe/common/busy_polling_loop.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_buffer.h>\n#include <tensorpipe/common/cuda_lib.h>\n#include <tensorpipe/common/device.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/ibv.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_gdr {\n\nclass ChannelImpl;\n\nclass IbvNic {\n public:\n  IbvNic(\n      std::string name,\n      IbvLib::device& device,\n      const IbvLib& ibvLib,\n      const CudaLib& cudaLib);\n\n  IbvProtectionDomain& getIbvPd() {\n    return pd_;\n  }\n\n  IbvCompletionQueue& getIbvCq() {\n    return cq_;\n  }\n\n  const IbvAddress& getIbvAddress() {\n    return addr_;\n  }\n\n  struct SendInfo {\n    void* addr;\n    size_t length;\n    uint32_t lkey;\n  };\n\n  void postSend(\n      IbvQueuePair& qp,\n      SendInfo info,\n      std::function<void(const Error&)> cb);\n\n  struct RecvInfo {\n    void* addr;\n    size_t length;\n    uint32_t lkey;\n  };\n\n  void postRecv(\n      IbvQueuePair& qp,\n      RecvInfo info,\n      std::function<void(const Error&)> cb);\n\n  bool pollOnce();\n\n  IbvMemoryRegion& registerMemory(CudaBuffer buffer);\n\n  bool readyToClose() const;\n\n  void setId(std::string id);\n\n private:\n  // The ID of the context, for use in verbose logging.\n  std::string id_{\"N/A\"};\n  // The name of the InfiniBand device.\n  const std::string name_;\n\n  const CudaLib& cudaLib_;\n\n  const IbvLib& ibvLib_;\n  IbvContext ctx_;\n  IbvProtectionDomain pd_;\n  IbvCompletionQueue cq_;\n  IbvAddress addr_;\n\n  size_t numAvailableRecvSlots_ = kNumRecvs;\n  std::deque<\n      std::tuple<IbvQueuePair&, RecvInfo, std::function<void(const Error&)>>>\n      recvsWaitingForSlots_;\n\n  size_t numAvailableSendSlots_ = kNumSends;\n  std::deque<\n      std::tuple<IbvQueuePair&, SendInfo, std::function<void(const Error&)>>>\n      sendsWaitingForSlots_;\n\n  // We need one common map for both send and recv requests because in principle\n  // we cannot access the opcode of a failed operation, meaning we couldn't\n  // match it to its callback. However, we could group them by QP number or, in\n  // fact, we could have the QP store these requests and we just wake it up when\n  // a completion occurs.\n  std::unordered_map<\n      uint64_t,\n      std::tuple<IbvLib::wc_opcode, std::function<void(const Error&)>>>\n      requestsInFlight_;\n  uint64_t nextRequestId_ = 0;\n\n  // The ibverbs memory regions are indexed by the CUDA driver's buffer ID for\n  // the GPU allocation, which is unique (within the process) and never reused.\n  // This will prevent us from re-using the memory region if a buffer gets\n  // deallocated and reallocated (although we will not clean up the old memory\n  // region until we close the context).\n  std::map<unsigned long long, IbvMemoryRegion> memoryRegions_;\n};\n\nclass ContextImpl final\n    : public BusyPollingLoop,\n      public ContextImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  static std::shared_ptr<ContextImpl> create(\n      optional<std::vector<std::string>> gpuIdxToNicName = nullopt);\n\n  ContextImpl(\n      std::unordered_map<Device, std::string> deviceDescriptors,\n      CudaLib cudaLib,\n      IbvLib ibvLib,\n      IbvDeviceList deviceList,\n      optional<std::vector<std::string>> gpuIdxToNicName);\n\n  std::shared_ptr<Channel> createChannel(\n      std::vector<std::shared_ptr<transport::Connection>> connections,\n      Endpoint endpoint);\n\n  size_t numConnectionsNeeded() const override;\n\n  const CudaLib& getCudaLib();\n\n  const std::vector<size_t>& getGpuToNicMapping();\n\n  const IbvLib& getIbvLib();\n\n  IbvNic& getIbvNic(size_t nicIdx);\n\n  void waitForCudaEvent(\n      const CudaEvent& event,\n      std::function<void(const Error&)> cb);\n\n protected:\n  // Implement BusyPollingLoop hooks.\n  bool pollOnce() override;\n  bool readyToClose() override;\n\n  // Implement the entry points called by ContextImplBoilerplate.\n  void handleErrorImpl() override;\n  void joinImpl() override;\n  void setIdImpl() override;\n\n private:\n  const CudaLib cudaLib_;\n  const IbvLib ibvLib_;\n\n  std::vector<IbvNic> ibvNics_;\n  std::vector<size_t> gpuToNic_;\n\n  std::list<std::tuple<const CudaEvent&, std::function<void(const Error&)>>>\n      pendingCudaEvents_;\n\n  bool pollCudaOnce();\n\n  void waitForCudaEventFromLoop(\n      const CudaEvent& event,\n      std::function<void(const Error&)> cb);\n};\n\n} // namespace cuda_gdr\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/error.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n\n#include <tensorpipe/channel/error.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_gdr {\n\nclass IbvError final : public BaseError {\n public:\n  explicit IbvError(std::string error) : error_(error) {}\n\n  std::string what() const override {\n    return error_;\n  }\n\n private:\n  std::string error_;\n};\n\n} // namespace cuda_gdr\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/factory.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_gdr/factory.h>\n\n#include <tensorpipe/channel/context_boilerplate.h>\n#include <tensorpipe/channel/cuda_gdr/channel_impl.h>\n#include <tensorpipe/channel/cuda_gdr/context_impl.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_gdr {\n\nstd::shared_ptr<Context> create(\n    optional<std::vector<std::string>> gpuIdxToNicName) {\n  return std::make_shared<ContextBoilerplate<ContextImpl, ChannelImpl>>(\n      std::move(gpuIdxToNicName));\n}\n\n} // namespace cuda_gdr\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_gdr/factory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <vector>\n\n#include <tensorpipe/channel/context.h>\n#include <tensorpipe/common/optional.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_gdr {\n\nstd::shared_ptr<Context> create(\n    optional<std::vector<std::string>> gpuIdxToNicName = nullopt);\n\n} // namespace cuda_gdr\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/channel_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_ipc/channel_impl.h>\n\n#include <memory>\n#include <string>\n#include <utility>\n\n#include <cuda.h>\n#include <cuda_runtime.h>\n#include <nop/serializer.h>\n#include <nop/structure.h>\n#include <nop/types/variant.h>\n\n#include <tensorpipe/channel/cuda_ipc/constants.h>\n#include <tensorpipe/channel/cuda_ipc/context_impl.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_buffer.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/transport/connection.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_ipc {\n\nNOP_EXTERNAL_STRUCTURE(\n    ContextImpl::OutboxInfo,\n    processIdentifier,\n    memHandle,\n    eventHandles);\n\nnamespace {\n\nsize_t ceilOfRatio(size_t n, size_t d) {\n  return (n + d - 1) / d;\n}\n\nstruct Descriptor {\n  int deviceIdx;\n  size_t slotIdx;\n  nop::Optional<ContextImpl::OutboxInfo> outboxInfo;\n  NOP_STRUCTURE(Descriptor, deviceIdx, slotIdx, outboxInfo);\n};\n\n} // namespace\n\nChunkSendOperation::ChunkSendOperation(\n    uint64_t bufferSequenceNumber,\n    size_t chunkId,\n    size_t numChunks,\n    TSendCallback callback,\n    int deviceIdx,\n    const void* ptr,\n    size_t length,\n    cudaStream_t stream)\n    : bufferSequenceNumber(bufferSequenceNumber),\n      chunkId(chunkId),\n      numChunks(numChunks),\n      ptr(ptr),\n      length(length),\n      deviceIdx(deviceIdx),\n      stream(stream),\n      callback(std::move(callback)) {}\n\nChunkRecvOperation::ChunkRecvOperation(\n    uint64_t bufferSequenceNumber,\n    size_t chunkId,\n    size_t numChunks,\n    TRecvCallback callback,\n    int deviceIdx,\n    void* ptr,\n    size_t length,\n    cudaStream_t stream)\n    : bufferSequenceNumber(bufferSequenceNumber),\n      chunkId(chunkId),\n      numChunks(numChunks),\n      ptr(ptr),\n      length(length),\n      deviceIdx(deviceIdx),\n      stream(stream),\n      callback(std::move(callback)) {}\n\nChannelImpl::ChannelImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::shared_ptr<transport::Connection> descriptorConnection,\n    std::shared_ptr<transport::Connection> replyConnection)\n    : ChannelImplBoilerplate<ContextImpl, ChannelImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      descriptorConnection_(std::move(descriptorConnection)),\n      replyConnection_(std::move(replyConnection)) {}\n\nvoid ChannelImpl::initImplFromLoop() {\n  context_->enroll(*this);\n}\n\nvoid ChannelImpl::sendImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TSendCallback callback) {\n  if (length == 0) {\n    callback(error_);\n    return;\n  }\n\n  int deviceIdx = cudaDeviceForPointer(\n      context_->getCudaLib(), buffer.unwrap<CudaBuffer>().ptr);\n  const size_t numChunks = ceilOfRatio(length, kSlotSize);\n\n  for (size_t chunkIdx = 0; chunkIdx < numChunks; chunkIdx += 1) {\n    size_t offset = chunkIdx * kSlotSize;\n    ChunkSendOpIter opIter = chunkSendOps_.emplaceBack(\n        nextChunkBeingSent_++,\n        sequenceNumber,\n        chunkIdx,\n        numChunks,\n        chunkIdx == numChunks - 1 ? std::move(callback) : nullptr,\n        deviceIdx,\n        reinterpret_cast<uint8_t*>(buffer.unwrap<CudaBuffer>().ptr) + offset,\n        std::min(length - offset, kSlotSize),\n        buffer.unwrap<CudaBuffer>().stream);\n\n    chunkSendOps_.advanceOperation(opIter);\n  }\n}\n\nvoid ChannelImpl::advanceChunkSendOperation(\n    ChunkSendOpIter opIter,\n    ChunkSendOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  ChunkSendOperation& op = *opIter;\n\n  // Needs to go after previous op invoked its callback because the last chunk\n  // in a series (that corresponds to one operation) must invoke its callback\n  // only when all chunks in the series are done.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::UNINITIALIZED,\n      /*to=*/ChunkSendOperation::FINISHED,\n      /*cond=*/error_ && prevOpState >= ChunkSendOperation::FINISHED,\n      /*actions=*/\n      {&ChannelImpl::callSendCallback});\n\n  // Needs to go after previous op to ensure later operations are not holding\n  // events while earlier ones are still blocked waiting for them, because the\n  // events will only be returned after the control messages have been written\n  // and sent, and this won't happen for later operations until earlier ones\n  // have reached that stage too, and if those are blocked waiting for events\n  // then we may deadlock.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::UNINITIALIZED,\n      /*to=*/ChunkSendOperation::ALLOCATING_STAGING_BUFFER,\n      /*cond=*/!error_ &&\n          prevOpState >= ChunkSendOperation::ALLOCATING_STAGING_BUFFER,\n      /*actions=*/\n      {&ChannelImpl::allocateStagingBuffer});\n\n  // See above for why this needs to go after previous op.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/\n      ChunkSendOperation::ALLOCATING_STAGING_BUFFER,\n      /*to=*/ChunkSendOperation::FINISHED,\n      /*cond=*/error_ && op.doneAllocatingStagingBuffer &&\n          prevOpState >= ChunkSendOperation::FINISHED,\n      /*actions=*/\n      {&ChannelImpl::callSendCallback, &ChannelImpl::releaseStagingBuffer});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the descriptor control connection and read calls on the\n  // reply control connection.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/\n      ChunkSendOperation::ALLOCATING_STAGING_BUFFER,\n      /*to=*/ChunkSendOperation::READING_REPLY,\n      /*cond=*/!error_ && op.doneAllocatingStagingBuffer &&\n          prevOpState >= ChunkSendOperation::READING_REPLY,\n      /*actions=*/\n      {&ChannelImpl::copyFromSourceToStaging,\n       &ChannelImpl::writeDescriptor,\n       &ChannelImpl::readReply,\n       &ChannelImpl::callSendCallback});\n\n  // See above for why this needs to go after previous op.\n  chunkSendOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkSendOperation::READING_REPLY,\n      /*to=*/ChunkSendOperation::FINISHED,\n      /*cond=*/op.doneReadingReply &&\n          prevOpState >= ChunkSendOperation::FINISHED,\n      /*actions=*/\n      {&ChannelImpl::releaseStagingBuffer});\n}\n\nvoid ChannelImpl::allocateStagingBuffer(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  TP_VLOG(5) << \"Channel \" << id_\n             << \" is allocating temporary memory for chunk #\" << op.chunkId\n             << \" of \" << op.numChunks << \" for buffer #\"\n             << op.bufferSequenceNumber;\n  context_->allocateSlot(\n      op.deviceIdx,\n      op.length,\n      callbackWrapper_([opIter](\n                           ChannelImpl& impl,\n                           size_t slotIdx,\n                           Allocator::TChunk buffer,\n                           CudaEvent* event) {\n        TP_VLOG(5) << \"Channel \" << impl.id_\n                   << \" is done allocating temporary memory for chunk #\"\n                   << opIter->chunkId << \" of \" << opIter->numChunks\n                   << \" for buffer #\" << opIter->bufferSequenceNumber;\n        opIter->doneAllocatingStagingBuffer = true;\n        if (!impl.error_) {\n          opIter->slotIdx = slotIdx;\n          opIter->stagingBuffer = std::move(buffer);\n          opIter->event = event;\n        }\n        impl.chunkSendOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::copyFromSourceToStaging(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  op.event->wait(op.stream, op.deviceIdx);\n  {\n    CudaDeviceGuard guard(op.deviceIdx);\n    TP_CUDA_CHECK(cudaMemcpyAsync(\n        op.stagingBuffer.get(),\n        op.ptr,\n        op.length,\n        cudaMemcpyDeviceToDevice,\n        op.stream));\n  }\n  op.event->record(op.stream);\n}\n\nvoid ChannelImpl::writeDescriptor(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  const CudaLib& cudaLib = context_->getCudaLib();\n\n  auto nopDescriptorHolder = std::make_shared<NopHolder<Descriptor>>();\n  Descriptor& nopDescriptor = nopDescriptorHolder->getObject();\n  nopDescriptor.deviceIdx = op.deviceIdx;\n  nopDescriptor.slotIdx = op.slotIdx;\n  if (localOutboxesSent_.size() <= op.deviceIdx) {\n    localOutboxesSent_.resize(op.deviceIdx + 1, false);\n  }\n  if (!localOutboxesSent_[op.deviceIdx]) {\n    localOutboxesSent_[op.deviceIdx] = true;\n    nopDescriptor.outboxInfo = context_->getLocalOutboxInfo(op.deviceIdx);\n  }\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing nop object (descriptor #\"\n             << op.sequenceNumber << \")\";\n  descriptorConnection_->write(\n      *nopDescriptorHolder,\n      callbackWrapper_([nopDescriptorHolder,\n                        sequenceNumber{op.sequenceNumber}](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done writing nop object (descriptor #\" << sequenceNumber\n                   << \")\";\n      }));\n}\n\nvoid ChannelImpl::readReply(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading nop object (reply #\"\n             << op.sequenceNumber << \")\";\n  replyConnection_->read(\n      nullptr,\n      0,\n      callbackWrapper_([opIter](\n                           ChannelImpl& impl,\n                           const void* /* unused */,\n                           size_t /* unused */) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done reading nop object (reply #\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingReply = true;\n        impl.chunkSendOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::releaseStagingBuffer(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  op.stagingBuffer = nullptr;\n}\n\nvoid ChannelImpl::callSendCallback(ChunkSendOpIter opIter) {\n  ChunkSendOperation& op = *opIter;\n\n  if (op.callback) {\n    op.callback(error_);\n    // Reset callback to release the resources it was holding.\n    op.callback = nullptr;\n  }\n}\n\nvoid ChannelImpl::recvImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TRecvCallback callback) {\n  if (length == 0) {\n    callback(error_);\n    return;\n  }\n\n  int deviceIdx = cudaDeviceForPointer(\n      context_->getCudaLib(), buffer.unwrap<CudaBuffer>().ptr);\n  const size_t numChunks = ceilOfRatio(length, kSlotSize);\n\n  for (size_t chunkIdx = 0; chunkIdx < numChunks; chunkIdx += 1) {\n    size_t offset = chunkIdx * kSlotSize;\n    ChunkRecvOpIter opIter = chunkRecvOps_.emplaceBack(\n        nextChunkBeingReceived_++,\n        sequenceNumber,\n        chunkIdx,\n        numChunks,\n        chunkIdx == numChunks - 1 ? std::move(callback) : nullptr,\n        deviceIdx,\n        reinterpret_cast<uint8_t*>(buffer.unwrap<CudaBuffer>().ptr) + offset,\n        std::min(length - offset, kSlotSize),\n        buffer.unwrap<CudaBuffer>().stream);\n\n    chunkRecvOps_.advanceOperation(opIter);\n  }\n}\n\nvoid ChannelImpl::advanceChunkRecvOperation(\n    ChunkRecvOpIter opIter,\n    ChunkRecvOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  ChunkRecvOperation& op = *opIter;\n\n  // Needs to go after previous op invoked its callback because the last chunk\n  // in a series (that corresponds to one operation) must invoke its callback\n  // only when all chunks in the series are done.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::UNINITIALIZED,\n      /*to=*/ChunkRecvOperation::FINISHED,\n      /*cond=*/error_ && prevOpState >= ChunkRecvOperation::FINISHED,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of read calls on descriptor control connection.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::UNINITIALIZED,\n      /*to=*/ChunkRecvOperation::READING_DESCRIPTOR,\n      /*cond=*/!error_ && prevOpState >= ChunkRecvOperation::READING_DESCRIPTOR,\n      /*actions=*/{&ChannelImpl::readDescriptor});\n\n  // See above for why this needs to go after previous op.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::READING_DESCRIPTOR,\n      /*to=*/ChunkRecvOperation::FINISHED,\n      /*cond=*/error_ && op.doneReadingDescriptor &&\n          prevOpState >= ChunkRecvOperation::FINISHED,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on reply control connection.\n  chunkRecvOps_.attemptTransition(\n      opIter,\n      /*from=*/ChunkRecvOperation::READING_DESCRIPTOR,\n      /*to=*/ChunkRecvOperation::FINISHED,\n      /*cond=*/!error_ && op.doneReadingDescriptor &&\n          prevOpState >= ChunkRecvOperation::FINISHED,\n      /*actions=*/\n      {&ChannelImpl::copyFromStagingToTarget,\n       &ChannelImpl::writeReply,\n       &ChannelImpl::callRecvCallback});\n}\n\nvoid ChannelImpl::readDescriptor(ChunkRecvOpIter opIter) {\n  ChunkRecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading nop object (descriptor #\"\n             << op.sequenceNumber << \")\";\n  auto nopDescriptorHolder = std::make_shared<NopHolder<Descriptor>>();\n  descriptorConnection_->read(\n      *nopDescriptorHolder,\n      callbackWrapper_([opIter, nopDescriptorHolder](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done reading nop object (descriptor #\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingDescriptor = true;\n        if (!impl.error_) {\n          Descriptor& nopDescriptor = nopDescriptorHolder->getObject();\n          opIter->remoteDeviceIdx = nopDescriptor.deviceIdx;\n          opIter->remoteSlotIdx = nopDescriptor.slotIdx;\n          if (!nopDescriptor.outboxInfo.empty()) {\n            if (impl.remoteOutboxesReceived_.size() <=\n                opIter->remoteDeviceIdx) {\n              impl.remoteOutboxesReceived_.resize(opIter->remoteDeviceIdx + 1);\n            }\n            TP_DCHECK(!impl.remoteOutboxesReceived_[opIter->remoteDeviceIdx]\n                           .has_value());\n            impl.remoteOutboxesReceived_[opIter->remoteDeviceIdx] =\n                std::move(nopDescriptor.outboxInfo.take());\n          }\n        }\n        impl.chunkRecvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::copyFromStagingToTarget(ChunkRecvOpIter opIter) {\n  ChunkRecvOperation& op = *opIter;\n\n  if (remoteOutboxesOpened_.size() <= op.remoteDeviceIdx) {\n    remoteOutboxesOpened_.resize(op.remoteDeviceIdx + 1);\n  }\n  if (remoteOutboxesOpened_[op.remoteDeviceIdx].size() <= op.deviceIdx) {\n    remoteOutboxesOpened_[op.remoteDeviceIdx].resize(op.deviceIdx + 1, nullptr);\n  }\n  if (remoteOutboxesOpened_[op.remoteDeviceIdx][op.deviceIdx] == nullptr) {\n    remoteOutboxesOpened_[op.remoteDeviceIdx][op.deviceIdx] =\n        &context_->openRemoteOutbox(\n            op.deviceIdx,\n            op.remoteDeviceIdx,\n            remoteOutboxesReceived_[op.remoteDeviceIdx].value());\n  }\n  const ContextImpl::RemoteOutboxHandle& outbox =\n      *remoteOutboxesOpened_[op.remoteDeviceIdx][op.deviceIdx];\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is copying payload (#\"\n             << op.sequenceNumber << \")\";\n\n  outbox.events[op.remoteSlotIdx]->wait(op.stream, op.deviceIdx);\n  {\n    CudaDeviceGuard guard(op.deviceIdx);\n    TP_CUDA_CHECK(cudaMemcpyAsync(\n        op.ptr,\n        outbox.buffer.ptr() + kSlotSize * op.remoteSlotIdx,\n        op.length,\n        cudaMemcpyDeviceToDevice,\n        op.stream));\n  }\n  outbox.events[op.remoteSlotIdx]->record(op.stream);\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" done copying payload (#\"\n             << op.sequenceNumber << \")\";\n}\n\nvoid ChannelImpl::callRecvCallback(ChunkRecvOpIter opIter) {\n  ChunkRecvOperation& op = *opIter;\n\n  if (op.callback) {\n    op.callback(error_);\n    // Reset callback to release the resources it was holding.\n    op.callback = nullptr;\n  }\n}\n\nvoid ChannelImpl::writeReply(ChunkRecvOpIter opIter) {\n  ChunkRecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing reply notification (#\"\n             << op.sequenceNumber << \")\";\n  replyConnection_->write(\n      nullptr,\n      0,\n      callbackWrapper_([sequenceNumber{op.sequenceNumber}](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_\n                   << \" done writing reply notification (#\" << sequenceNumber\n                   << \")\";\n      }));\n}\n\nvoid ChannelImpl::handleErrorImpl() {\n  chunkSendOps_.advanceAllOperations();\n  chunkRecvOps_.advanceAllOperations();\n\n  descriptorConnection_->close();\n  replyConnection_->close();\n\n  context_->unenroll(*this);\n}\n\n} // namespace cuda_ipc\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/channel_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <list>\n#include <memory>\n#include <string>\n\n#include <cuda_runtime.h>\n\n#include <tensorpipe/channel/channel_impl_boilerplate.h>\n#include <tensorpipe/channel/cuda_ipc/context_impl.h>\n#include <tensorpipe/common/allocator.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_lib.h>\n#include <tensorpipe/common/state_machine.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_ipc {\n\nclass ContextImpl;\n\nstruct ChunkSendOperation {\n  enum State {\n    UNINITIALIZED,\n    ALLOCATING_STAGING_BUFFER,\n    READING_REPLY,\n    FINISHED\n  };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneAllocatingStagingBuffer{false};\n  bool doneReadingReply{false};\n\n  // Arguments at creation\n  const uint64_t bufferSequenceNumber;\n  const size_t chunkId;\n  const size_t numChunks;\n  const void* const ptr;\n  const size_t length;\n  const int deviceIdx;\n  const cudaStream_t stream;\n  TSendCallback callback;\n\n  // Other data\n  size_t slotIdx{static_cast<size_t>(-1)};\n  Allocator::TChunk stagingBuffer;\n  CudaEvent* event{nullptr};\n\n  ChunkSendOperation(\n      uint64_t bufferSequenceNumber,\n      size_t chunkId,\n      size_t numChunks,\n      TSendCallback callback,\n      int deviceIdx,\n      const void* ptr,\n      size_t length,\n      cudaStream_t stream);\n};\n\nstruct ChunkRecvOperation {\n  enum State { UNINITIALIZED, READING_DESCRIPTOR, FINISHED };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReadingDescriptor{false};\n  bool doneRequestingEvent{false};\n  bool doneReadingAck{false};\n\n  // Arguments at creation\n  const uint64_t bufferSequenceNumber;\n  const size_t chunkId;\n  const size_t numChunks;\n  void* const ptr;\n  const size_t length;\n  const int deviceIdx;\n  const cudaStream_t stream;\n  TRecvCallback callback;\n\n  // Other data\n  int remoteDeviceIdx;\n  size_t remoteSlotIdx;\n\n  ChunkRecvOperation(\n      uint64_t bufferSequenceNumber,\n      size_t chunkId,\n      size_t numChunks,\n      TRecvCallback callback,\n      int deviceIdx,\n      void* ptr,\n      size_t length,\n      cudaStream_t stream);\n};\n\nclass ChannelImpl final\n    : public ChannelImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  ChannelImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::shared_ptr<transport::Connection> descriptorConnection,\n      std::shared_ptr<transport::Connection> replyConnection);\n\n protected:\n  // Implement the entry points called by ChannelImplBoilerplate.\n  void initImplFromLoop() override;\n  void sendImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TSendCallback callback) override;\n  void recvImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TRecvCallback callback) override;\n  void handleErrorImpl() override;\n\n private:\n  const std::shared_ptr<transport::Connection> descriptorConnection_;\n  const std::shared_ptr<transport::Connection> replyConnection_;\n\n  // For each local device, whether we've already sent the information about the\n  // device's outbox to the remote, who needs it to open a handle to the outbox.\n  // Used during the send path.\n  std::vector<bool> localOutboxesSent_;\n\n  // For each remote device, the information about the remote's outbox for that\n  // device (or nullopt, if we haven't received it yet). We store it because we\n  // will only receive it once (for the first buffer coming from that device)\n  // but we might need it multiple time, as we need to open it for every local\n  // target device where it might be needed. Used during the receive path.\n  std::vector<optional<ContextImpl::OutboxInfo>> remoteOutboxesReceived_;\n  // For each remote and local device, the handle to the opened remote outbox\n  // for that device (or nullptr if we haven't opened it yet). Used during the\n  // receive path.\n  std::vector<std::vector<const ContextImpl::RemoteOutboxHandle*>>\n      remoteOutboxesOpened_;\n\n  // A sequence number for the chunks.\n  uint64_t nextChunkBeingSent_{0};\n  uint64_t nextChunkBeingReceived_{0};\n\n  OpsStateMachine<ChannelImpl, ChunkSendOperation> chunkSendOps_{\n      *this,\n      &ChannelImpl::advanceChunkSendOperation};\n  using ChunkSendOpIter = decltype(chunkSendOps_)::Iter;\n  OpsStateMachine<ChannelImpl, ChunkRecvOperation> chunkRecvOps_{\n      *this,\n      &ChannelImpl::advanceChunkRecvOperation};\n  using ChunkRecvOpIter = decltype(chunkRecvOps_)::Iter;\n\n  // State machines for send and recv ops.\n  void advanceChunkSendOperation(\n      ChunkSendOpIter opIter,\n      ChunkSendOperation::State prevOpState);\n  void advanceChunkRecvOperation(\n      ChunkRecvOpIter opIter,\n      ChunkRecvOperation::State prevOpState);\n\n  // Actions (i.e., methods that begin a state transition).\n  // For send operations:\n  void allocateStagingBuffer(ChunkSendOpIter opIter);\n  void copyFromSourceToStaging(ChunkSendOpIter opIter);\n  void writeDescriptor(ChunkSendOpIter opIter);\n  void readReply(ChunkSendOpIter opIter);\n  void releaseStagingBuffer(ChunkSendOpIter opIter);\n  void callSendCallback(ChunkSendOpIter opIter);\n  // For recv operations:\n  void readDescriptor(ChunkRecvOpIter opIter);\n  void copyFromStagingToTarget(ChunkRecvOpIter opIter);\n  void callRecvCallback(ChunkRecvOpIter opIter);\n  void writeReply(ChunkRecvOpIter opIter);\n};\n\n} // namespace cuda_ipc\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/constants.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstddef>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_ipc {\n\n// FIXME Avoid this anonymous namespace and use inline variables in C++-17.\nnamespace {\n\n// Define all three (redundant) values to make them explicit and avoid\n// misunderstandings due to miscalculations.\nstatic constexpr size_t kStagingAreaSize = 32 * 1024 * 1024;\nstatic constexpr size_t kSlotSize = 8 * 1024 * 1024;\nstatic constexpr size_t kNumSlots = 4;\n\nstatic_assert(kStagingAreaSize == kSlotSize * kNumSlots, \"\");\n\n} // namespace\n\n} // namespace cuda_ipc\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_ipc/context_impl.h>\n\n#include <algorithm>\n#include <array>\n#include <functional>\n#include <iomanip>\n#include <ios>\n#include <memory>\n#include <sstream>\n#include <string>\n#include <tuple>\n#include <utility>\n#include <vector>\n\n#include <unistd.h>\n\n#include <nop/serializer.h>\n#include <nop/structure.h>\n\n#include <tensorpipe/channel/cuda_ipc/channel_impl.h>\n#include <tensorpipe/channel/cuda_ipc/constants.h>\n#include <tensorpipe/channel/helpers.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/nop.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/common/strings.h>\n#include <tensorpipe/common/system.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_ipc {\n\nnamespace {\n\nstd::tuple<std::vector<std::string>, std::vector<std::vector<bool>>>\ngetGlobalUuidsAndP2pSupport(const NvmlLib& nvmlLib) {\n  unsigned int numDevices;\n  TP_NVML_CHECK(nvmlLib, nvmlLib.deviceGetCount_v2(&numDevices));\n\n  std::vector<nvmlDevice_t> devices(numDevices);\n  std::vector<std::string> uuids(numDevices);\n  for (unsigned int devIdx = 0; devIdx < numDevices; devIdx++) {\n    TP_NVML_CHECK(\n        nvmlLib, nvmlLib.deviceGetHandleByIndex_v2(devIdx, &devices[devIdx]));\n\n    // NVML_DEVICE_UUID_V2_BUFFER_SIZE was introduced in CUDA 11.0.\n#ifdef NVML_DEVICE_UUID_V2_BUFFER_SIZE\n    std::array<char, NVML_DEVICE_UUID_V2_BUFFER_SIZE> uuid;\n#else\n    std::array<char, NVML_DEVICE_UUID_BUFFER_SIZE> uuid;\n#endif\n    TP_NVML_CHECK(\n        nvmlLib,\n        nvmlLib.deviceGetUUID(devices[devIdx], uuid.data(), uuid.size()));\n    std::string uuidStr(uuid.data());\n    TP_THROW_ASSERT_IF(uuidStr.substr(0, 4) != \"GPU-\")\n        << \"Couldn't obtain valid UUID for GPU #\" << devIdx\n        << \" from CUDA driver. Got: \" << uuidStr;\n    uuidStr = uuidStr.substr(4);\n    TP_THROW_ASSERT_IF(!isValidUuid(uuidStr))\n        << \"Couldn't obtain valid UUID for GPU #\" << devIdx\n        << \" from NVML. Got: \" << uuidStr;\n    uuids[devIdx] = std::move(uuidStr);\n  }\n\n  std::vector<std::vector<bool>> p2pSupport(numDevices);\n  for (int devIdx = 0; devIdx < numDevices; devIdx++) {\n    p2pSupport[devIdx].resize(numDevices);\n    for (int otherDevIdx = 0; otherDevIdx < numDevices; otherDevIdx++) {\n      if (devIdx == otherDevIdx) {\n        p2pSupport[devIdx][otherDevIdx] = true;\n        continue;\n      }\n      nvmlGpuP2PStatus_t p2pStatus;\n      TP_NVML_CHECK(\n          nvmlLib,\n          nvmlLib.deviceGetP2PStatus(\n              devices[devIdx],\n              devices[otherDevIdx],\n              NVML_P2P_CAPS_INDEX_READ,\n              &p2pStatus));\n      p2pSupport[devIdx][otherDevIdx] = (p2pStatus == NVML_P2P_STATUS_OK);\n    }\n  }\n\n  return std::make_tuple(std::move(uuids), std::move(p2pSupport));\n}\n\nint globalIdxForDevice(\n    const std::vector<std::string>& globalUuids,\n    const std::string& uuid) {\n  auto iter = std::find(globalUuids.begin(), globalUuids.end(), uuid);\n  TP_THROW_ASSERT_IF(iter == globalUuids.end())\n      << \"Couldn't find GPU with UUID \" << uuid;\n\n  return iter - globalUuids.begin();\n}\n\nstruct DeviceDescriptor {\n  std::string bootId;\n  int64_t pid;\n  std::string deviceUuid;\n  NOP_STRUCTURE(DeviceDescriptor, bootId, pid, deviceUuid);\n};\n\nDeviceDescriptor deserializeDeviceDescriptor(\n    const std::string& deviceDescriptor) {\n  NopHolder<DeviceDescriptor> nopHolder;\n  loadDescriptor(nopHolder, deviceDescriptor);\n  return std::move(nopHolder.getObject());\n}\n\nstd::string generateBootId() {\n  auto bootID = getBootID();\n  TP_THROW_ASSERT_IF(!bootID) << \"Unable to read boot_id\";\n  return bootID.value();\n}\n\n// FIXME We'd want this to return a std::vector<CudaEvent>, but CudaEvents\n// aren't default-constructible nor movable. Hence either we make them such,\n// or we use some pointer magic (like placement new). For now, we work around\n// this by using a unique_ptr and wrapping them in optional<>, but it's silly.\nstd::unique_ptr<optional<CudaEvent>[]> createIpcEventArray(\n    int deviceIdx,\n    size_t numEvents) {\n  auto events = std::make_unique<optional<CudaEvent>[]>(numEvents);\n  // The CUDA driver has a bug where creating and/or destroying IPC events\n  // sometimes causes a deadlock (it's unclear which of the two steps is the\n  // cause). The deadlock tends to manifest as a cudaStreamSynchronize call\n  // never returning. Just to be safe, and to catch such a deadlock early and\n  // clearly, let's add extra syncs here. (The bug is fixed in v460).\n  {\n    CudaDeviceGuard guard(deviceIdx);\n    TP_CUDA_CHECK(cudaDeviceSynchronize());\n  }\n  for (size_t idx = 0; idx < numEvents; idx++) {\n    events[idx].emplace(deviceIdx, true);\n    // One day we might get tempted to have CudaEvent lazily initialize its\n    // cudaEvent_t, just like PyTorch does. However here we explicitly want to\n    // eagerly initialize IPC events, as creating them late might deadlock with\n    // old CUDA driver versions. This check should hopefully catch if the event\n    // is lazy-initialized.\n    TP_THROW_ASSERT_IF(events[idx]->raw() == nullptr);\n  }\n  {\n    CudaDeviceGuard guard(deviceIdx);\n    TP_CUDA_CHECK(cudaDeviceSynchronize());\n  }\n  return events;\n}\n\nstd::vector<cudaIpcEventHandle_t> getIpcHandlesForEventArray(\n    optional<CudaEvent> events[],\n    size_t numEvents) {\n  std::vector<cudaIpcEventHandle_t> eventHandles(numEvents);\n  for (size_t idx = 0; idx < numEvents; idx++) {\n    eventHandles[idx] = events[idx]->getIpcHandle();\n  }\n  return eventHandles;\n}\n\n} // namespace\n\nContextImpl::Outbox::Outbox(int deviceIdx)\n    : buffer(kStagingAreaSize, deviceIdx),\n      events(createIpcEventArray(deviceIdx, kNumSlots)),\n      handle(this->buffer.getIpcHandle()),\n      eventHandles(getIpcHandlesForEventArray(this->events.get(), kNumSlots)),\n      allocator(this->buffer.ptr(), kNumSlots, kSlotSize) {}\n\nContextImpl::Outbox::~Outbox() {\n  // The CUDA driver has a bug where creating and/or destroying IPC events\n  // sometimes causes a deadlock (it's unclear which of the two steps is the\n  // cause). The deadlock tends to manifest as a cudaStreamSynchronize call\n  // never returning. Just to be safe, and to catch such a deadlock early and\n  // clearly, let's add extra syncs here. (The bug is fixed in v460).\n  {\n    CudaDeviceGuard guard(buffer.deviceIdx());\n    TP_CUDA_CHECK(cudaDeviceSynchronize());\n  }\n  events.reset();\n  {\n    CudaDeviceGuard guard(buffer.deviceIdx());\n    TP_CUDA_CHECK(cudaDeviceSynchronize());\n  }\n}\n\nstd::shared_ptr<ContextImpl> ContextImpl::create() {\n  Error error;\n  CudaLib cudaLib;\n  std::tie(error, cudaLib) = CudaLib::create();\n  if (error) {\n    TP_VLOG(5)\n        << \"CUDA IPC channel is not viable because libcuda could not be loaded: \"\n        << error.what();\n    return nullptr;\n  }\n\n  NvmlLib nvmlLib;\n  std::tie(error, nvmlLib) = NvmlLib::create();\n  if (error) {\n    TP_VLOG(5)\n        << \"CUDA IPC channel is not viable because libnvidia-ml could not be loaded: \"\n        << error.what();\n    return nullptr;\n  }\n\n  const std::string bootId = generateBootId();\n  const pid_t pid = ::getpid();\n\n  std::unordered_map<Device, std::string> deviceDescriptors;\n  for (const auto& device : getCudaDevices(cudaLib)) {\n    // This part is largely inspired from\n    // https://github.com/NVIDIA/cuda-samples/blob/master/Samples/simpleIPC/simpleIPC.cu.\n    cudaDeviceProp props;\n    TP_CUDA_CHECK(cudaGetDeviceProperties(&props, device.index));\n\n    // Unified addressing is required for IPC.\n    if (!props.unifiedAddressing) {\n      TP_VLOG(4) << \"CUDA IPC channel is not viable because CUDA device \"\n                 << device.index << \" does not have unified addressing\";\n      return nullptr;\n    }\n\n    // The other two compute modes are \"exclusive\" and \"prohibited\", both of\n    // which prevent access from an other process.\n    int computeMode = -1;\n    TP_CUDA_CHECK(cudaDeviceGetAttribute(&computeMode, cudaDevAttrComputeMode, device.index));\n    if (computeMode != cudaComputeModeDefault) {\n      TP_VLOG(4) << \"CUDA IPC channel is not viable because CUDA device \"\n                 << device.index << \" is not in default compute mode\";\n      return nullptr;\n    }\n\n    NopHolder<DeviceDescriptor> nopHolder;\n    DeviceDescriptor& deviceDescriptor = nopHolder.getObject();\n    deviceDescriptor.bootId = bootId;\n    deviceDescriptor.pid = static_cast<int64_t>(pid);\n    deviceDescriptor.deviceUuid = getUuidOfDevice(cudaLib, device.index);\n\n    deviceDescriptors[device] = saveDescriptor(nopHolder);\n  }\n\n  std::vector<std::string> globalUuids;\n  std::vector<std::vector<bool>> p2pSupport;\n  std::tie(globalUuids, p2pSupport) = getGlobalUuidsAndP2pSupport(nvmlLib);\n  TP_VLOG(4) << \"The UUIDs of all the GPUs found by the CUDA IPC channel are \"\n             << joinStrs(globalUuids);\n  TP_VLOG(4) << \"The peer-to-peer support found by the CUDA IPC channel is \"\n             << formatMatrix(p2pSupport);\n\n  std::ostringstream oss;\n  optional<std::string> nsId = getLinuxNamespaceId(LinuxNamespace::kPid);\n  if (!nsId.has_value()) {\n    TP_VLOG(4)\n        << \"CUDA IPC channel is not viable because it couldn't determine the PID namespace ID\";\n    return nullptr;\n  }\n  oss << nsId.value() << \"_\" << pid;\n  std::string processIdentifier = oss.str();\n\n  return std::make_shared<ContextImpl>(\n      std::move(deviceDescriptors),\n      std::move(cudaLib),\n      std::move(nvmlLib),\n      std::move(globalUuids),\n      std::move(p2pSupport),\n      std::move(processIdentifier));\n}\n\nContextImpl::ContextImpl(\n    std::unordered_map<Device, std::string> deviceDescriptors,\n    CudaLib cudaLib,\n    NvmlLib nvmlLib,\n    std::vector<std::string> globalUuids,\n    std::vector<std::vector<bool>> p2pSupport,\n    std::string processIdentifier)\n    : ContextImplBoilerplate<ContextImpl, ChannelImpl>(\n          std::move(deviceDescriptors)),\n      cudaLib_(std::move(cudaLib)),\n      nvmlLib_(std::move(nvmlLib)),\n      globalUuids_(std::move(globalUuids)),\n      p2pSupport_(std::move(p2pSupport)),\n      processIdentifier_(processIdentifier) {}\n\nstd::shared_ptr<Channel> ContextImpl::createChannel(\n    std::vector<std::shared_ptr<transport::Connection>> connections,\n    Endpoint /* unused */) {\n  TP_DCHECK_EQ(numConnectionsNeeded(), connections.size());\n  return createChannelInternal(\n      std::move(connections[0]), std::move(connections[1]));\n}\n\nsize_t ContextImpl::numConnectionsNeeded() const {\n  // The control connection needs to carry two unrelated streams in each\n  // direction (the descriptors and the replies), and it's thus simpler to just\n  // use two such connections.\n  return 2;\n}\n\nbool ContextImpl::canCommunicateWithRemote(\n    const std::string& localDeviceDescriptor,\n    const std::string& remoteDeviceDescriptor) const {\n  DeviceDescriptor nopLocalDeviceDescriptor =\n      deserializeDeviceDescriptor(localDeviceDescriptor);\n  DeviceDescriptor nopRemoteDeviceDescriptor =\n      deserializeDeviceDescriptor(remoteDeviceDescriptor);\n\n  if (nopLocalDeviceDescriptor.bootId != nopRemoteDeviceDescriptor.bootId) {\n    return false;\n  }\n\n  // Disable CudaIpc when both endpoints are in the same process, as a CUDA IPC\n  // handle cannot be opened in the same process in which it was created.\n  if (nopLocalDeviceDescriptor.pid == nopRemoteDeviceDescriptor.pid) {\n    return false;\n  }\n\n  int localGlobalIdx =\n      globalIdxForDevice(globalUuids_, nopLocalDeviceDescriptor.deviceUuid);\n  int remoteGlobalIdx =\n      globalIdxForDevice(globalUuids_, nopRemoteDeviceDescriptor.deviceUuid);\n\n  return p2pSupport_[localGlobalIdx][remoteGlobalIdx] &&\n      p2pSupport_[remoteGlobalIdx][localGlobalIdx];\n}\n\nconst CudaLib& ContextImpl::getCudaLib() {\n  return cudaLib_;\n}\n\nvoid ContextImpl::allocateSlot(\n    int deviceIdx,\n    size_t length,\n    SlotAllocCallback callback) {\n  if (outboxes_.size() <= deviceIdx) {\n    outboxes_.resize(deviceIdx + 1);\n  }\n  if (outboxes_[deviceIdx] == nullptr) {\n    outboxes_[deviceIdx] = std::make_unique<Outbox>(deviceIdx);\n  }\n\n  // We don't need to wrap this callback with the callbackWrapper_ because the\n  // callback that was passed to this method already is, and because all we're\n  // doing here is wrap that callback and do read-only accesses to the outbox.\n  Outbox& outbox = *outboxes_[deviceIdx];\n  outboxes_[deviceIdx]->allocator.alloc(\n      length,\n      [&outbox, callback{std::move(callback)}](\n          const Error& error, Allocator::TChunk chunk) {\n        if (error) {\n          callback(error, 0, std::move(chunk), nullptr);\n          return;\n        }\n        size_t slotIdx = (chunk.get() - outbox.buffer.ptr()) / kSlotSize;\n        callback(\n            error, slotIdx, std::move(chunk), &outbox.events[slotIdx].value());\n      });\n}\n\nContextImpl::OutboxInfo ContextImpl::getLocalOutboxInfo(int deviceIdx) {\n  TP_DCHECK(outboxes_.size() > deviceIdx);\n  TP_DCHECK(outboxes_[deviceIdx] != nullptr);\n  OutboxInfo info;\n  info.processIdentifier = processIdentifier_;\n  info.memHandle = std::string(\n      reinterpret_cast<const char*>(&outboxes_[deviceIdx]->handle),\n      sizeof(cudaIpcMemHandle_t));\n  info.eventHandles.reserve(kNumSlots);\n  for (size_t slotIdx = 0; slotIdx < kNumSlots; slotIdx++) {\n    info.eventHandles.emplace_back(\n        reinterpret_cast<const char*>(\n            &outboxes_[deviceIdx]->eventHandles[slotIdx]),\n        sizeof(cudaIpcEventHandle_t));\n  }\n  return info;\n}\n\nconst ContextImpl::RemoteOutboxHandle& ContextImpl::openRemoteOutbox(\n    int localDeviceIdx,\n    int remoteDeviceIdx,\n    OutboxInfo remoteOutboxInfo) {\n  RemoteOutboxKey key{\n      std::move(remoteOutboxInfo.processIdentifier),\n      remoteDeviceIdx,\n      localDeviceIdx};\n  decltype(remoteOutboxes_)::iterator iter;\n  bool didntExist;\n  std::tie(iter, didntExist) =\n      remoteOutboxes_.emplace(std::move(key), RemoteOutboxHandle{});\n  RemoteOutboxHandle& outbox = iter->second;\n\n  if (didntExist) {\n    CudaDeviceGuard guard(localDeviceIdx);\n    outbox.buffer = CudaIpcBuffer(\n        localDeviceIdx,\n        *reinterpret_cast<const cudaIpcMemHandle_t*>(\n            remoteOutboxInfo.memHandle.data()));\n    outbox.events = std::make_unique<optional<CudaEvent>[]>(kNumSlots);\n    for (size_t slotIdx = 0; slotIdx < kNumSlots; slotIdx++) {\n      outbox.events[slotIdx].emplace(\n          localDeviceIdx,\n          *reinterpret_cast<const cudaIpcEventHandle_t*>(\n              remoteOutboxInfo.eventHandles[slotIdx].data()));\n    }\n  }\n\n  return outbox;\n}\n\nvoid ContextImpl::handleErrorImpl() {\n  for (std::unique_ptr<Outbox>& outbox : outboxes_) {\n    if (outbox != nullptr) {\n      outbox->allocator.close();\n    }\n  }\n}\n\nvoid ContextImpl::joinImpl() {}\n\nbool ContextImpl::inLoop() const {\n  return loop_.inLoop();\n};\n\nvoid ContextImpl::deferToLoop(std::function<void()> fn) {\n  loop_.deferToLoop(std::move(fn));\n};\n\n} // namespace cuda_ipc\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <functional>\n#include <map>\n#include <memory>\n#include <string>\n#include <vector>\n\n#include <tensorpipe/channel/context_impl_boilerplate.h>\n#include <tensorpipe/common/allocator.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_lib.h>\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/device.h>\n#include <tensorpipe/common/nvml_lib.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_ipc {\n\nclass ChannelImpl;\n\nclass ContextImpl final\n    : public ContextImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  static std::shared_ptr<ContextImpl> create();\n\n  ContextImpl(\n      std::unordered_map<Device, std::string> deviceDescriptors,\n      CudaLib cudaLib,\n      NvmlLib nvmlLib,\n      std::vector<std::string> globalUuids,\n      std::vector<std::vector<bool>> p2pSupport,\n      std::string processIdentifier);\n\n  std::shared_ptr<Channel> createChannel(\n      std::vector<std::shared_ptr<transport::Connection>> connections,\n      Endpoint endpoint);\n\n  size_t numConnectionsNeeded() const override;\n\n  bool canCommunicateWithRemote(\n      const std::string& localDeviceDescriptor,\n      const std::string& remoteDeviceDescriptor) const override;\n\n  const CudaLib& getCudaLib();\n\n  // Takes the index of the slot, the (smart) pointer to the slot, and the (raw)\n  // pointer to the event for the slot.\n  using SlotAllocCallback =\n      std::function<void(const Error&, size_t, Allocator::TChunk, CudaEvent*)>;\n  void allocateSlot(int deviceIdx, size_t length, SlotAllocCallback callback);\n\n  struct OutboxInfo {\n    std::string processIdentifier;\n    std::string memHandle;\n    std::vector<std::string> eventHandles;\n  };\n  OutboxInfo getLocalOutboxInfo(int deviceIdx);\n\n  struct RemoteOutboxHandle {\n    CudaIpcBuffer buffer;\n    std::unique_ptr<optional<CudaEvent>[]> events;\n  };\n  const RemoteOutboxHandle& openRemoteOutbox(\n      int localDeviceIdx,\n      int remoteDeviceIdx,\n      OutboxInfo remoteOutboxInfo);\n\n  // Implement the DeferredExecutor interface.\n  bool inLoop() const override;\n  void deferToLoop(std::function<void()> fn) override;\n\n protected:\n  // Implement the entry points called by ContextImplBoilerplate.\n  void handleErrorImpl() override;\n  void joinImpl() override;\n\n private:\n  OnDemandDeferredExecutor loop_;\n\n  const CudaLib cudaLib_;\n  const NvmlLib nvmlLib_;\n\n  const std::vector<std::string> globalUuids_;\n  const std::vector<std::vector<bool>> p2pSupport_;\n\n  // A combination of the process's PID namespace and its PID, which combined\n  // with the device index allows us to uniquely identify each staging buffer on\n  // the current machine.\n  const std::string processIdentifier_;\n\n  // A CUDA on-device allocation that acts as the outbox for all the channels of\n  // this context. We cannot directly get and open IPC handles of the user's\n  // buffers, as this will fail if the user already opened such a handle (this\n  // limitation was lifted in CUDA 11.1). Moreover, since we \"leak\" the opened\n  // IPC handles (i.e., we leave them open, and close them all when the context\n  // closes), if we opened an IPC handle to a user buffer and the user freed\n  // that buffer we would prevent CUDA from really making that memory available\n  // again (this is an undocumented behavior which was observed experimentally).\n  // As a solution, we create our own allocation and get and open an IPC handle\n  // to that, as we can guarantee its lifetime and that no other IPC handle\n  // exists. We then use it as a staging ground for outgoing transfers, copying\n  // chunks to it from source buffers, and having the remote copy them to the\n  // target buffer.\n  struct Outbox {\n    const CudaDeviceBuffer buffer;\n    std::unique_ptr<optional<CudaEvent>[]> events;\n    const cudaIpcMemHandle_t handle;\n    const std::vector<cudaIpcEventHandle_t> eventHandles;\n    Allocator allocator;\n\n    explicit Outbox(int deviceIdx);\n    ~Outbox();\n  };\n  std::vector<std::unique_ptr<Outbox>> outboxes_;\n\n  struct RemoteOutboxKey {\n    std::string processIdentifier;\n    int remoteDeviceIdx;\n    int localDeviceIdx;\n\n    bool operator==(const RemoteOutboxKey& other) const noexcept {\n      return processIdentifier == other.processIdentifier &&\n          remoteDeviceIdx == other.remoteDeviceIdx &&\n          localDeviceIdx == other.localDeviceIdx;\n    }\n  };\n  struct RemoteOutboxKeyHash {\n    size_t operator()(const RemoteOutboxKey& key) const noexcept {\n      size_t h1 = std::hash<std::string>{}(key.processIdentifier);\n      size_t h2 = std::hash<int>{}(key.remoteDeviceIdx);\n      size_t h3 = std::hash<int>{}(key.localDeviceIdx);\n      // Byte-shift hashes in order to \"capture\" the order of members.\n      // FIXME Should we use a proper hash combiner? We can copy Boost's one.\n      return h1 ^ (h2 << 1) ^ (h3 << 2);\n    }\n  };\n  std::unordered_map<RemoteOutboxKey, RemoteOutboxHandle, RemoteOutboxKeyHash>\n      remoteOutboxes_;\n};\n\n} // namespace cuda_ipc\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/factory.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_ipc/factory.h>\n\n#include <tensorpipe/channel/context_boilerplate.h>\n#include <tensorpipe/channel/cuda_ipc/channel_impl.h>\n#include <tensorpipe/channel/cuda_ipc/context_impl.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_ipc {\n\nstd::shared_ptr<Context> create() {\n  return std::make_shared<ContextBoilerplate<ContextImpl, ChannelImpl>>();\n}\n\n} // namespace cuda_ipc\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_ipc/factory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <tensorpipe/channel/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_ipc {\n\nstd::shared_ptr<Context> create();\n\n} // namespace cuda_ipc\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/channel_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_xth/channel_impl.h>\n\n#include <memory>\n#include <string>\n#include <utility>\n\n#include <cuda_runtime.h>\n#include <nop/serializer.h>\n#include <nop/structure.h>\n\n#include <tensorpipe/channel/cuda_xth/context_impl.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/transport/connection.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_xth {\n\nnamespace {\n\nstruct Descriptor {\n  uintptr_t startEvent;\n  uintptr_t srcPtr;\n  int srcDeviceIdx;\n  uintptr_t srcStream;\n  NOP_STRUCTURE(Descriptor, startEvent, srcPtr, srcDeviceIdx, srcStream);\n};\n\n} // namespace\n\nSendOperation::SendOperation(\n    int deviceIdx,\n    void* ptr,\n    size_t length,\n    cudaStream_t stream,\n    TSendCallback callback)\n    : deviceIdx(deviceIdx),\n      ptr(ptr),\n      length(length),\n      stream(stream),\n      callback(std::move(callback)),\n      startEv(deviceIdx) {\n  startEv.record(stream);\n}\n\nRecvOperation::RecvOperation(\n    int deviceIdx,\n    CudaBuffer buffer,\n    size_t length,\n    TRecvCallback callback)\n    : ptr(buffer.ptr),\n      length(length),\n      deviceIdx(deviceIdx),\n      stream(buffer.stream),\n      callback(std::move(callback)) {}\n\nvoid RecvOperation::process() {\n  {\n    CudaDeviceGuard guard(deviceIdx);\n    TP_CUDA_CHECK(cudaStreamWaitEvent(stream, startEvent, 0));\n    TP_CUDA_CHECK(\n        cudaMemcpyAsync(ptr, srcPtr, length, cudaMemcpyDeviceToDevice, stream));\n  }\n\n  CudaEvent stopEv(deviceIdx);\n  stopEv.record(stream);\n  stopEv.wait(srcStream, srcDeviceIdx);\n}\n\nChannelImpl::ChannelImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::shared_ptr<transport::Connection> descriptorConnection,\n    std::shared_ptr<transport::Connection> completionConnection)\n    : ChannelImplBoilerplate<ContextImpl, ChannelImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      descriptorConnection_(std::move(descriptorConnection)),\n      completionConnection_(std::move(completionConnection)) {}\n\nvoid ChannelImpl::initImplFromLoop() {\n  context_->enroll(*this);\n}\n\nvoid ChannelImpl::sendImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TSendCallback callback) {\n  int deviceIdx = cudaDeviceForPointer(\n      context_->getCudaLib(), buffer.unwrap<CudaBuffer>().ptr);\n  SendOpIter opIter = sendOps_.emplaceBack(\n      sequenceNumber,\n      deviceIdx,\n      buffer.unwrap<CudaBuffer>().ptr,\n      length,\n      buffer.unwrap<CudaBuffer>().stream,\n      std::move(callback));\n\n  sendOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceSendOperation(\n    SendOpIter opIter,\n    SendOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  SendOperation& op = *opIter;\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the descriptor control connection and read calls on the\n  // completion control connection.\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::READING_COMPLETION,\n      /*cond=*/!error_ && prevOpState >= SendOperation::READING_COMPLETION,\n      /*actions=*/\n      {&ChannelImpl::writeDescriptor, &ChannelImpl::readCompletion});\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::READING_COMPLETION,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/op.doneReadingCompletion,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n}\n\nvoid ChannelImpl::writeDescriptor(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  auto nopHolder = std::make_shared<NopHolder<Descriptor>>();\n  Descriptor& nopDescriptor = nopHolder->getObject();\n  static_assert(std::is_pointer<cudaEvent_t>::value, \"\");\n  static_assert(std::is_pointer<cudaStream_t>::value, \"\");\n  nopDescriptor.startEvent = reinterpret_cast<uintptr_t>(op.startEv.raw());\n  nopDescriptor.srcDeviceIdx = op.deviceIdx;\n  nopDescriptor.srcPtr = reinterpret_cast<uintptr_t>(op.ptr);\n  nopDescriptor.srcStream = reinterpret_cast<uintptr_t>(op.stream);\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing descriptor (#\"\n             << op.sequenceNumber << \")\";\n  descriptorConnection_->write(\n      *nopHolder,\n      callbackWrapper_([sequenceNumber{op.sequenceNumber},\n                        nopHolder](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done writing descriptor (#\"\n                   << sequenceNumber << \")\";\n      }));\n}\n\nvoid ChannelImpl::readCompletion(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading completion (#\"\n             << op.sequenceNumber << \")\";\n  completionConnection_->read(\n      nullptr,\n      0,\n      callbackWrapper_([opIter](\n                           ChannelImpl& impl,\n                           const void* /* unused */,\n                           size_t /* unused */) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done reading completion (#\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingCompletion = true;\n        impl.sendOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::callSendCallback(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::recvImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TRecvCallback callback) {\n  int deviceIdx = cudaDeviceForPointer(\n      context_->getCudaLib(), buffer.unwrap<CudaBuffer>().ptr);\n  RecvOpIter opIter = recvOps_.emplaceBack(\n      sequenceNumber,\n      deviceIdx,\n      buffer.unwrap<CudaBuffer>(),\n      length,\n      std::move(callback));\n\n  recvOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceRecvOperation(\n    RecvOpIter opIter,\n    RecvOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  RecvOperation& op = *opIter;\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of read calls on the descriptor control connection.\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::READING_DESCRIPTOR,\n      /*cond=*/!error_ && prevOpState >= RecvOperation::READING_DESCRIPTOR,\n      /*actions=*/{&ChannelImpl::readDescriptor});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::READING_DESCRIPTOR,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ && op.doneReadingDescriptor,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the completion control connection.\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::READING_DESCRIPTOR,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/!error_ && op.doneReadingDescriptor &&\n          prevOpState >= RecvOperation::FINISHED,\n      /*actions=*/\n      {&ChannelImpl::waitOnStartEventAndCopyAndSyncWithSourceStream,\n       &ChannelImpl::callRecvCallback,\n       &ChannelImpl::writeCompletion});\n}\n\nvoid ChannelImpl::readDescriptor(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading descriptor (#\"\n             << op.sequenceNumber << \")\";\n  auto nopHolderIn = std::make_shared<NopHolder<Descriptor>>();\n  descriptorConnection_->read(\n      *nopHolderIn, callbackWrapper_([opIter, nopHolderIn](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done reading descriptor (#\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingDescriptor = true;\n        if (!impl.error_) {\n          Descriptor& nopDescriptor = nopHolderIn->getObject();\n          static_assert(std::is_pointer<cudaEvent_t>::value, \"\");\n          static_assert(std::is_pointer<cudaStream_t>::value, \"\");\n          opIter->startEvent =\n              reinterpret_cast<cudaEvent_t>(nopDescriptor.startEvent);\n          opIter->srcPtr = reinterpret_cast<const void*>(nopDescriptor.srcPtr);\n          opIter->srcDeviceIdx = nopDescriptor.srcDeviceIdx;\n          opIter->srcStream =\n              reinterpret_cast<cudaStream_t>(nopDescriptor.srcStream);\n        }\n        impl.recvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::waitOnStartEventAndCopyAndSyncWithSourceStream(\n    RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is copying payload (#\"\n             << op.sequenceNumber << \")\";\n  op.process();\n  TP_VLOG(6) << \"Channel \" << id_ << \" done copying payload (#\"\n             << op.sequenceNumber << \")\";\n}\n\nvoid ChannelImpl::callRecvCallback(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::writeCompletion(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing completion (#\"\n             << op.sequenceNumber << \")\";\n  completionConnection_->write(\n      nullptr,\n      0,\n      callbackWrapper_([sequenceNumber{op.sequenceNumber}](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done writing completion (#\"\n                   << sequenceNumber << \")\";\n      }));\n}\n\nvoid ChannelImpl::handleErrorImpl() {\n  sendOps_.advanceAllOperations();\n  recvOps_.advanceAllOperations();\n\n  descriptorConnection_->close();\n  completionConnection_->close();\n\n  context_->unenroll(*this);\n}\n\n} // namespace cuda_xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/channel_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n\n#include <tensorpipe/channel/channel_impl_boilerplate.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_buffer.h>\n#include <tensorpipe/common/state_machine.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_xth {\n\nclass ContextImpl;\n\nstruct SendOperation {\n  enum State { UNINITIALIZED, READING_COMPLETION, FINISHED };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReadingCompletion{false};\n\n  // Arguments at creation\n  int deviceIdx;\n  void* ptr;\n  size_t length;\n  cudaStream_t stream;\n  TSendCallback callback;\n\n  // Other stuff\n  CudaEvent startEv;\n\n  SendOperation(\n      int deviceIdx,\n      void* ptr,\n      size_t length,\n      cudaStream_t stream,\n      TSendCallback callback);\n};\n\nstruct RecvOperation {\n  enum State { UNINITIALIZED, READING_DESCRIPTOR, FINISHED };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReadingDescriptor{false};\n\n  // Arguments at creation\n  void* const ptr;\n  const size_t length;\n  const int deviceIdx;\n  const cudaStream_t stream;\n  TRecvCallback callback;\n\n  // Other data\n  cudaEvent_t startEvent;\n  const void* srcPtr;\n  int srcDeviceIdx;\n  cudaStream_t srcStream;\n\n  RecvOperation(\n      int deviceIdx,\n      CudaBuffer buffer,\n      size_t length,\n      TRecvCallback callback);\n\n  void process();\n};\n\nclass ChannelImpl final\n    : public ChannelImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  ChannelImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::shared_ptr<transport::Connection> descriptorConnection,\n      std::shared_ptr<transport::Connection> completionConnection);\n\n protected:\n  // Implement the entry points called by ChannelImplBoilerplate.\n  void initImplFromLoop() override;\n  void sendImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TSendCallback callback) override;\n  void recvImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TRecvCallback callback) override;\n  void handleErrorImpl() override;\n\n private:\n  const std::shared_ptr<transport::Connection> descriptorConnection_;\n  const std::shared_ptr<transport::Connection> completionConnection_;\n\n  OpsStateMachine<ChannelImpl, SendOperation> sendOps_{\n      *this,\n      &ChannelImpl::advanceSendOperation};\n  using SendOpIter = decltype(sendOps_)::Iter;\n  OpsStateMachine<ChannelImpl, RecvOperation> recvOps_{\n      *this,\n      &ChannelImpl::advanceRecvOperation};\n  using RecvOpIter = decltype(recvOps_)::Iter;\n\n  // State machines for send and recv ops.\n  void advanceSendOperation(\n      SendOpIter opIter,\n      SendOperation::State prevOpState);\n  void advanceRecvOperation(\n      RecvOpIter opIter,\n      RecvOperation::State prevOpState);\n\n  // Actions (i.e., methods that begin a state transition).\n  // For send operations:\n  void writeDescriptor(SendOpIter opIter);\n  void readCompletion(SendOpIter opIter);\n  void callSendCallback(SendOpIter opIter);\n  // For recv operations:\n  void readDescriptor(RecvOpIter opIter);\n  void waitOnStartEventAndCopyAndSyncWithSourceStream(RecvOpIter opIter);\n  void callRecvCallback(RecvOpIter opIter);\n  void writeCompletion(RecvOpIter opIter);\n};\n\n} // namespace cuda_xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_xth/context_impl.h>\n\n#include <unistd.h>\n\n#include <functional>\n#include <sstream>\n#include <string>\n#include <utility>\n\n#include <tensorpipe/channel/cuda_xth/channel_impl.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/common/system.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_xth {\n\nstd::shared_ptr<ContextImpl> ContextImpl::create() {\n  Error error;\n  CudaLib cudaLib;\n  std::tie(error, cudaLib) = CudaLib::create();\n  if (error) {\n    TP_VLOG(5)\n        << \"CUDA XTH channel is not viable because libcuda could not be loaded: \"\n        << error.what();\n    return nullptr;\n  }\n\n  std::ostringstream oss;\n  auto bootID = getBootID();\n  TP_THROW_ASSERT_IF(!bootID) << \"Unable to read boot_id\";\n  auto nsID = getLinuxNamespaceId(LinuxNamespace::kPid);\n  if (!nsID) {\n    TP_VLOG(5)\n        << \"CUDA XTH channel is not viable because it couldn't determine the PID namespace ID\";\n    return nullptr;\n  }\n  oss << bootID.value() << \"_\" << nsID.value() << \"_\" << ::getpid();\n  const std::string domainDescriptor = oss.str();\n\n  std::unordered_map<Device, std::string> deviceDescriptors;\n  for (const auto& device : getCudaDevices(cudaLib)) {\n    cudaDeviceProp props;\n    TP_CUDA_CHECK(cudaGetDeviceProperties(&props, device.index));\n\n    // Unified addressing is required for cross-device `cudaMemcpyAsync()`. We\n    // could lift this requirement by adding a fallback to\n    // `cudaMemcpyPeerAsync()`.\n    if (!props.unifiedAddressing) {\n      TP_VLOG(4) << \"CUDA XTH channel is not viable because CUDA device \"\n                 << device.index << \" does not have unified addressing\";\n      return nullptr;\n    }\n    deviceDescriptors[device] = domainDescriptor;\n  }\n\n  if (deviceDescriptors.empty()) {\n    return nullptr;\n  }\n\n  return std::make_shared<ContextImpl>(\n      std::move(cudaLib), std::move(deviceDescriptors));\n}\n\nContextImpl::ContextImpl(\n    CudaLib cudaLib,\n    std::unordered_map<Device, std::string> deviceDescriptors)\n    : ContextImplBoilerplate<ContextImpl, ChannelImpl>(\n          std::move(deviceDescriptors)),\n      cudaLib_(std::move(cudaLib)) {}\n\nstd::shared_ptr<Channel> ContextImpl::createChannel(\n    std::vector<std::shared_ptr<transport::Connection>> connections,\n    Endpoint /* unused */) {\n  TP_DCHECK_EQ(numConnectionsNeeded(), connections.size());\n  return createChannelInternal(\n      std::move(connections[0]), std::move(connections[1]));\n}\n\nsize_t ContextImpl::numConnectionsNeeded() const {\n  return 2;\n}\n\nconst CudaLib& ContextImpl::getCudaLib() {\n  return cudaLib_;\n}\n\nvoid ContextImpl::handleErrorImpl() {}\n\nvoid ContextImpl::joinImpl() {}\n\nbool ContextImpl::inLoop() const {\n  return loop_.inLoop();\n};\n\nvoid ContextImpl::deferToLoop(std::function<void()> fn) {\n  loop_.deferToLoop(std::move(fn));\n};\n\n} // namespace cuda_xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <tensorpipe/channel/context_impl_boilerplate.h>\n#include <tensorpipe/common/cuda_lib.h>\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/device.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_xth {\n\nclass ChannelImpl;\n\nclass ContextImpl final\n    : public ContextImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  static std::shared_ptr<ContextImpl> create();\n\n  ContextImpl(\n      CudaLib cudaLib,\n      std::unordered_map<Device, std::string> deviceDescriptors);\n\n  std::shared_ptr<Channel> createChannel(\n      std::vector<std::shared_ptr<transport::Connection>> connections,\n      Endpoint endpoint);\n\n  size_t numConnectionsNeeded() const override;\n\n  const CudaLib& getCudaLib();\n\n  // Implement the DeferredExecutor interface.\n  bool inLoop() const override;\n  void deferToLoop(std::function<void()> fn) override;\n\n protected:\n  // Implement the entry points called by ContextImplBoilerplate.\n  void handleErrorImpl() override;\n  void joinImpl() override;\n\n private:\n  OnDemandDeferredExecutor loop_;\n\n  const CudaLib cudaLib_;\n};\n\n} // namespace cuda_xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/factory.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cuda_xth/factory.h>\n\n#include <tensorpipe/channel/context_boilerplate.h>\n#include <tensorpipe/channel/cuda_xth/channel_impl.h>\n#include <tensorpipe/channel/cuda_xth/context_impl.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_xth {\n\nstd::shared_ptr<Context> create() {\n  return std::make_shared<ContextBoilerplate<ContextImpl, ChannelImpl>>();\n}\n\n} // namespace cuda_xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/cuda_xth/factory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <tensorpipe/channel/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace cuda_xth {\n\nstd::shared_ptr<Context> create();\n\n} // namespace cuda_xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/error.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/error.h>\n\n#include <cstring>\n#include <sstream>\n\nnamespace tensorpipe {\nnamespace channel {\n\nstd::string ContextClosedError::what() const {\n  return \"context closed\";\n}\n\nstd::string ChannelClosedError::what() const {\n  return \"channel closed\";\n}\n\nstd::string ContextNotViableError::what() const {\n  return \"context not viable\";\n}\n\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/error.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n\n#include <tensorpipe/common/error.h>\n\nnamespace tensorpipe {\nnamespace channel {\n\nclass ContextClosedError final : public BaseError {\n public:\n  ContextClosedError() {}\n\n  std::string what() const override;\n};\n\nclass ChannelClosedError final : public BaseError {\n public:\n  ChannelClosedError() {}\n\n  std::string what() const override;\n};\n\nclass ContextNotViableError final : public BaseError {\n public:\n  ContextNotViableError() {}\n\n  std::string what() const override;\n};\n\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/helpers.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/helpers.h>\n\n#include <string>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/nop.h>\n\nnamespace tensorpipe {\nnamespace channel {\n\nstd::string saveDescriptor(const AbstractNopHolder& object) {\n  const size_t len = object.getSize();\n  std::string out(len, '\\0');\n  NopWriter writer(\n      const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(out.data())), len);\n\n  nop::Status<void> status = object.write(writer);\n  TP_THROW_ASSERT_IF(status.has_error())\n      << \"Error saving descriptor: \" << status.GetErrorMessage();\n\n  return out;\n}\n\nvoid loadDescriptor(AbstractNopHolder& object, const std::string& in) {\n  const size_t len = in.size();\n  NopReader reader(reinterpret_cast<const uint8_t*>(in.data()), len);\n\n  nop::Status<void> status = object.read(reader);\n  TP_THROW_ASSERT_IF(status.has_error())\n      << \"Error loading descriptor: \" << status.GetErrorMessage();\n}\n\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/helpers.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n// Note: never include this file from headers!\n\n#include <string>\n\n#include <tensorpipe/common/nop.h>\n\nnamespace tensorpipe {\nnamespace channel {\n\nstd::string saveDescriptor(const AbstractNopHolder& object);\n\nvoid loadDescriptor(AbstractNopHolder& object, const std::string& in);\n\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/mpt/channel_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/mpt/channel_impl.h>\n\n#include <memory>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/channel/mpt/context_impl.h>\n#include <tensorpipe/common/cpu_buffer.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/transport/connection.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace mpt {\n\nChannelImpl::ChannelImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::shared_ptr<transport::Connection> connection,\n    Endpoint endpoint,\n    uint64_t numLanes)\n    : ChannelImplBoilerplate<ContextImpl, ChannelImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      connection_(std::move(connection)),\n      endpoint_(endpoint),\n      numLanes_(numLanes),\n      lanes_(numLanes_) {}\n\nvoid ChannelImpl::initImplFromLoop() {\n  context_->enroll(*this);\n\n  TP_DCHECK_EQ(state_, UNINITIALIZED);\n  if (endpoint_ == Endpoint::kConnect) {\n    state_ = CLIENT_READING_HELLO;\n    auto nopHolderIn = std::make_shared<NopHolder<Packet>>();\n    TP_VLOG(6) << \"Channel \" << id_ << \" reading nop object (server hello)\";\n    connection_->read(\n        *nopHolderIn, callbackWrapper_([nopHolderIn](ChannelImpl& impl) {\n          TP_VLOG(6) << \"Channel \" << impl.id_\n                     << \" done reading nop object (server hello)\";\n          if (!impl.error_) {\n            impl.onClientReadHelloOnConnection(nopHolderIn->getObject());\n          }\n        }));\n  } else if (endpoint_ == Endpoint::kListen) {\n    state_ = SERVER_ACCEPTING_LANES;\n    const std::vector<std::string>& addresses = context_->addresses();\n    TP_DCHECK_EQ(addresses.size(), numLanes_);\n    auto nopHolderOut = std::make_shared<NopHolder<Packet>>();\n    Packet& nopPacket = nopHolderOut->getObject();\n    nopPacket.Become(nopPacket.index_of<ServerHello>());\n    ServerHello& nopServerHello = *nopPacket.get<ServerHello>();\n    for (uint64_t laneIdx = 0; laneIdx < numLanes_; ++laneIdx) {\n      nopServerHello.laneAdvertisements.emplace_back();\n      LaneAdvertisement& nopLaneAdvertisement =\n          nopServerHello.laneAdvertisements.back();\n      nopLaneAdvertisement.address = addresses[laneIdx];\n      TP_VLOG(6) << \"Channel \" << id_ << \" requesting connection (for lane \"\n                 << laneIdx << \")\";\n      uint64_t token = context_->registerConnectionRequest(\n          laneIdx,\n          callbackWrapper_(\n              [laneIdx](\n                  ChannelImpl& impl,\n                  std::shared_ptr<transport::Connection> connection) {\n                TP_VLOG(6) << \"Channel \" << impl.id_\n                           << \" done requesting connection (for lane \"\n                           << laneIdx << \")\";\n                if (!impl.error_) {\n                  impl.onServerAcceptOfLane(laneIdx, std::move(connection));\n                }\n              }));\n      laneRegistrationIds_.emplace(laneIdx, token);\n      nopLaneAdvertisement.registrationId = token;\n      numLanesBeingAccepted_++;\n    }\n    TP_VLOG(6) << \"Channel \" << id_ << \" writing nop object (server hello)\";\n    connection_->write(\n        *nopHolderOut, callbackWrapper_([nopHolderOut](ChannelImpl& impl) {\n          TP_VLOG(6) << \"Channel \" << impl.id_\n                     << \" done writing nop object (server hello)\";\n        }));\n  } else {\n    TP_THROW_ASSERT() << \"unknown endpoint\";\n  }\n}\n\nvoid ChannelImpl::onClientReadHelloOnConnection(const Packet& nopPacketIn) {\n  TP_DCHECK(context_->inLoop());\n  TP_DCHECK_EQ(state_, CLIENT_READING_HELLO);\n  TP_DCHECK_EQ(nopPacketIn.index(), nopPacketIn.index_of<ServerHello>());\n\n  const ServerHello& nopServerHello = *nopPacketIn.get<ServerHello>();\n  TP_DCHECK_EQ(nopServerHello.laneAdvertisements.size(), numLanes_);\n  lanes_.resize(numLanes_);\n  for (uint64_t laneIdx = 0; laneIdx < numLanes_; ++laneIdx) {\n    const LaneAdvertisement& nopLaneAdvertisement =\n        nopServerHello.laneAdvertisements[laneIdx];\n    std::shared_ptr<transport::Connection> lane =\n        context_->connect(laneIdx, nopLaneAdvertisement.address);\n    auto nopHolderOut = std::make_shared<NopHolder<Packet>>();\n    Packet& nopPacket = nopHolderOut->getObject();\n    nopPacket.Become(nopPacket.index_of<ClientHello>());\n    ClientHello& nopClientHello = *nopPacket.get<ClientHello>();\n    nopClientHello.registrationId = nopLaneAdvertisement.registrationId;\n    TP_VLOG(6) << \"Channel \" << id_\n               << \" writing nop object (client hello) on lane \" << laneIdx;\n    lane->write(\n        *nopHolderOut,\n        callbackWrapper_([laneIdx, nopHolderOut](ChannelImpl& impl) {\n          TP_VLOG(6) << \"Channel \" << impl.id_\n                     << \" done writing nop object (client hello) on lane \"\n                     << laneIdx;\n        }));\n    lanes_[laneIdx] = std::move(lane);\n  }\n\n  state_ = ESTABLISHED;\n  sendOps_.advanceAllOperations();\n  recvOps_.advanceAllOperations();\n}\n\nvoid ChannelImpl::onServerAcceptOfLane(\n    uint64_t laneIdx,\n    std::shared_ptr<transport::Connection> connection) {\n  TP_DCHECK(context_->inLoop());\n  TP_DCHECK_EQ(state_, SERVER_ACCEPTING_LANES);\n\n  TP_DCHECK(!lanes_[laneIdx]);\n  TP_DCHECK_LT(laneIdx, lanes_.size());\n  lanes_[laneIdx] = std::move(connection);\n  auto laneRegistrationIter = laneRegistrationIds_.find(laneIdx);\n  TP_DCHECK(laneRegistrationIter != laneRegistrationIds_.end());\n  context_->unregisterConnectionRequest(laneRegistrationIter->second);\n  laneRegistrationIds_.erase(laneRegistrationIter);\n  numLanesBeingAccepted_--;\n\n  if (numLanesBeingAccepted_ == 0) {\n    state_ = ESTABLISHED;\n    sendOps_.advanceAllOperations();\n    recvOps_.advanceAllOperations();\n  }\n}\n\nvoid ChannelImpl::sendImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TSendCallback callback) {\n  SendOpIter opIter = sendOps_.emplaceBack(sequenceNumber);\n  SendOperation& op = *opIter;\n  op.ptr = buffer.unwrap<CpuBuffer>().ptr;\n  op.length = length;\n  op.callback = std::move(callback);\n\n  sendOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceSendOperation(\n    SendOpIter opIter,\n    SendOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  SendOperation& op = *opIter;\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on lanes.\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::WRITING_CHUNKS,\n      /*cond=*/!error_ && state_ == ESTABLISHED &&\n          prevOpState >= SendOperation::WRITING_CHUNKS,\n      /*actions=*/{&ChannelImpl::writeChunks});\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::WRITING_CHUNKS,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/op.numChunksBeingWritten == 0,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n}\n\nvoid ChannelImpl::writeChunks(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  for (uint64_t laneIdx = 0; laneIdx < lanes_.size(); laneIdx++) {\n    // Insert \"cutpoints\" at equally-spaced intervals in the buffer, rounding\n    // them down if they don't end up being at an integer position.\n    uint64_t offsetStart = op.length * laneIdx / lanes_.size();\n    uint64_t offsetEnd = op.length * (laneIdx + 1) / lanes_.size();\n    // As void \"has no size\" we cannot do pointer arithmetic on it. We need to\n    // temporarily convert the pointer to a type that has a size of 1 byte.\n    const void* ptr = reinterpret_cast<const uint8_t*>(op.ptr) + offsetStart;\n    uint64_t length = offsetEnd - offsetStart;\n\n    // Write payload.\n    TP_VLOG(6) << \"Channel \" << id_ << \" writing payload #\" << op.sequenceNumber\n               << \" on lane \" << laneIdx;\n    lanes_[laneIdx]->write(\n        ptr, length, callbackWrapper_([opIter, laneIdx](ChannelImpl& impl) {\n          TP_VLOG(6) << \"Channel \" << impl.id_ << \" done writing payload #\"\n                     << opIter->sequenceNumber << \" on lane \" << laneIdx;\n          --opIter->numChunksBeingWritten;\n          impl.sendOps_.advanceOperation(opIter);\n        }));\n    ++op.numChunksBeingWritten;\n  }\n}\n\nvoid ChannelImpl::callSendCallback(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::recvImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TRecvCallback callback) {\n  RecvOpIter opIter = recvOps_.emplaceBack(sequenceNumber);\n  RecvOperation& op = *opIter;\n  op.ptr = buffer.unwrap<CpuBuffer>().ptr;\n  op.length = length;\n  op.callback = std::move(callback);\n\n  recvOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceRecvOperation(\n    RecvOpIter opIter,\n    RecvOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  RecvOperation& op = *opIter;\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of read calls on lanes.\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::READING_CHUNKS,\n      /*cond=*/!error_ && state_ == ESTABLISHED &&\n          prevOpState >= RecvOperation::READING_CHUNKS,\n      /*actions=*/{&ChannelImpl::readChunks});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::READING_CHUNKS,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/op.numChunksBeingRead == 0,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n}\n\nvoid ChannelImpl::readChunks(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  for (uint64_t laneIdx = 0; laneIdx < lanes_.size(); laneIdx++) {\n    // Insert \"cutpoints\" at equally-spaced intervals in the buffer, rounding\n    // them down if they don't end up being at an integer position.\n    uint64_t offsetStart = op.length * laneIdx / lanes_.size();\n    uint64_t offsetEnd = op.length * (laneIdx + 1) / lanes_.size();\n    // As void \"has no size\" we cannot do pointer arithmetic on it. We need to\n    // temporarily convert the pointer to a type that has a size of 1 byte.\n    void* ptr = reinterpret_cast<uint8_t*>(op.ptr) + offsetStart;\n    uint64_t length = offsetEnd - offsetStart;\n\n    // Read payload.\n    TP_VLOG(6) << \"Channel \" << id_ << \" reading payload #\" << op.sequenceNumber\n               << \" on lane \" << laneIdx;\n    lanes_[laneIdx]->read(\n        ptr,\n        length,\n        callbackWrapper_([opIter, laneIdx](\n                             ChannelImpl& impl,\n                             const void* /* unused */,\n                             size_t /* unused */) {\n          TP_VLOG(6) << \"Channel \" << impl.id_ << \" done reading payload #\"\n                     << opIter->sequenceNumber << \" on lane \" << laneIdx;\n          --opIter->numChunksBeingRead;\n          impl.recvOps_.advanceOperation(opIter);\n        }));\n    ++op.numChunksBeingRead;\n  }\n}\n\nvoid ChannelImpl::callRecvCallback(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::handleErrorImpl() {\n  sendOps_.advanceAllOperations();\n  recvOps_.advanceAllOperations();\n\n  // Close the connections so that all current operations will be aborted. This\n  // will cause their callbacks to be invoked, and only then we'll invoke ours.\n  connection_->close();\n  for (auto& lane : lanes_) {\n    if (lane) {\n      lane->close();\n    }\n  }\n\n  for (const auto& iter : laneRegistrationIds_) {\n    context_->unregisterConnectionRequest(iter.second);\n  }\n\n  context_->unenroll(*this);\n}\n\n// TODO Implement setIdImpl to propagate the ID to the connections\n\n} // namespace mpt\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/mpt/channel_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <deque>\n#include <memory>\n#include <string>\n#include <unordered_map>\n#include <vector>\n\n#include <tensorpipe/channel/channel_impl_boilerplate.h>\n#include <tensorpipe/channel/mpt/nop_types.h>\n#include <tensorpipe/common/state_machine.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace mpt {\n\nclass ContextImpl;\n\n// State capturing a single send operation.\nstruct SendOperation {\n  enum State { UNINITIALIZED, WRITING_CHUNKS, FINISHED };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  int64_t numChunksBeingWritten{0};\n\n  // Arguments at creation\n  const void* ptr;\n  size_t length;\n  TSendCallback callback;\n};\n\n// State capturing a single recv operation.\nstruct RecvOperation {\n  enum State { UNINITIALIZED, READING_CHUNKS, FINISHED };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  int64_t numChunksBeingRead{0};\n\n  // Arguments at creation\n  void* ptr;\n  size_t length;\n  TRecvCallback callback;\n};\n\nclass ChannelImpl final\n    : public ChannelImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  ChannelImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::shared_ptr<transport::Connection> connection,\n      Endpoint endpoint,\n      uint64_t numLanes);\n\n protected:\n  // Implement the entry points called by ChannelImplBoilerplate.\n  void initImplFromLoop() override;\n  void sendImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TSendCallback callback) override;\n  void recvImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TRecvCallback callback) override;\n  void handleErrorImpl() override;\n\n private:\n  enum State {\n    UNINITIALIZED,\n    CLIENT_READING_HELLO,\n    SERVER_ACCEPTING_LANES,\n    ESTABLISHED,\n  };\n\n  // Called when client reads the server's hello on backbone connection\n  void onClientReadHelloOnConnection(const Packet& nopPacketIn);\n\n  // Called when server accepts new client connection for lane\n  void onServerAcceptOfLane(\n      uint64_t laneIdx,\n      std::shared_ptr<transport::Connection> connection);\n\n  const std::shared_ptr<transport::Connection> connection_;\n  const Endpoint endpoint_;\n  State state_{UNINITIALIZED};\n  const uint64_t numLanes_;\n  uint64_t numLanesBeingAccepted_{0};\n  std::vector<std::shared_ptr<transport::Connection>> lanes_;\n  std::unordered_map<uint64_t, uint64_t> laneRegistrationIds_;\n\n  OpsStateMachine<ChannelImpl, SendOperation> sendOps_{\n      *this,\n      &ChannelImpl::advanceSendOperation};\n  using SendOpIter = decltype(sendOps_)::Iter;\n  OpsStateMachine<ChannelImpl, RecvOperation> recvOps_{\n      *this,\n      &ChannelImpl::advanceRecvOperation};\n  using RecvOpIter = decltype(recvOps_)::Iter;\n\n  // State machines for send and recv ops.\n  void advanceSendOperation(\n      SendOpIter opIter,\n      SendOperation::State prevOpState);\n  void advanceRecvOperation(\n      RecvOpIter opIter,\n      RecvOperation::State prevOpState);\n\n  // Actions (i.e., methods that begin a state transition).\n  // For send operations:\n  void writeChunks(SendOpIter opIter);\n  void callSendCallback(SendOpIter opIter);\n  // For recv operations:\n  void readChunks(RecvOpIter opIter);\n  void callRecvCallback(RecvOpIter opIter);\n};\n\n} // namespace mpt\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/mpt/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/mpt/context_impl.h>\n\n#include <memory>\n#include <sstream>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/channel/error.h>\n#include <tensorpipe/channel/mpt/channel_impl.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/transport/connection.h>\n#include <tensorpipe/transport/context.h>\n#include <tensorpipe/transport/listener.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace mpt {\n\nnamespace {\n\nstd::string generateDomainDescriptor(\n    const std::vector<std::shared_ptr<transport::Context>>& contexts) {\n  // FIXME Escape the contexts' domain descriptors in case they contain a colon?\n  // Or put them all in a nop object, that'll do the escaping for us.\n  // But is it okay to compare nop objects by equality bitwise?\n  std::ostringstream ss;\n  ss << contexts.size();\n  for (const auto& context : contexts) {\n    ss << \":\" << context->domainDescriptor();\n  }\n  return ss.str();\n}\n\n} // namespace\n\nstd::shared_ptr<ContextImpl> ContextImpl::create(\n    std::vector<std::shared_ptr<transport::Context>> contexts,\n    std::vector<std::shared_ptr<transport::Listener>> listeners) {\n  for (const auto& context : contexts) {\n    if (!context->isViable()) {\n      return nullptr;\n    }\n  }\n\n  std::unordered_map<Device, std::string> deviceDescriptors = {\n      {Device{kCpuDeviceType, 0}, generateDomainDescriptor(contexts)}};\n\n  return std::make_shared<ContextImpl>(\n      std::move(contexts), std::move(listeners), std::move(deviceDescriptors));\n}\n\nContextImpl::ContextImpl(\n    std::vector<std::shared_ptr<transport::Context>> contexts,\n    std::vector<std::shared_ptr<transport::Listener>> listeners,\n    std::unordered_map<Device, std::string> deviceDescriptors)\n    : ContextImplBoilerplate<ContextImpl, ChannelImpl>(\n          std::move(deviceDescriptors)),\n      contexts_(std::move(contexts)),\n      listeners_(std::move(listeners)) {\n  TP_THROW_ASSERT_IF(contexts_.size() != listeners_.size());\n  numLanes_ = contexts_.size();\n\n  addresses_.reserve(numLanes_);\n  for (const auto& listener : listeners_) {\n    addresses_.emplace_back(listener->addr());\n  }\n}\n\nvoid ContextImpl::initImplFromLoop() {\n  for (uint64_t laneIdx = 0; laneIdx < numLanes_; ++laneIdx) {\n    acceptLane(laneIdx);\n  }\n}\n\nstd::shared_ptr<Channel> ContextImpl::createChannel(\n    std::vector<std::shared_ptr<transport::Connection>> connections,\n    Endpoint endpoint) {\n  TP_DCHECK_EQ(numConnectionsNeeded(), connections.size());\n  return createChannelInternal(std::move(connections[0]), endpoint, numLanes_);\n}\n\nconst std::vector<std::string>& ContextImpl::addresses() const {\n  // As this is an immutable member (after it has been initialized in\n  // the constructor), we'll access it without deferring to the loop.\n  return addresses_;\n}\n\nuint64_t ContextImpl::registerConnectionRequest(\n    uint64_t laneIdx,\n    connection_request_callback_fn fn) {\n  TP_DCHECK(loop_.inLoop());\n\n  uint64_t registrationId = nextConnectionRequestRegistrationId_++;\n\n  TP_VLOG(4) << \"Channel context \" << id_\n             << \" received a connection request registration (#\"\n             << registrationId << \") on lane \" << laneIdx;\n\n  fn = [this, registrationId, fn{std::move(fn)}](\n           const Error& error,\n           std::shared_ptr<transport::Connection> connection) {\n    TP_VLOG(4) << \"Channel context \" << id_\n               << \" calling a connection request registration callback (#\"\n               << registrationId << \")\";\n    fn(error, std::move(connection));\n    TP_VLOG(4) << \"Channel context \" << id_\n               << \" done calling a connection request registration callback (#\"\n               << registrationId << \")\";\n  };\n\n  if (error_) {\n    fn(error_, std::shared_ptr<transport::Connection>());\n  } else {\n    connectionRequestRegistrations_.emplace(registrationId, std::move(fn));\n  }\n\n  return registrationId;\n}\n\nvoid ContextImpl::unregisterConnectionRequest(uint64_t registrationId) {\n  TP_DCHECK(loop_.inLoop());\n\n  TP_VLOG(4) << \"Channel context \" << id_\n             << \" received a connection request de-registration (#\"\n             << registrationId << \")\";\n\n  connectionRequestRegistrations_.erase(registrationId);\n}\n\nstd::shared_ptr<transport::Connection> ContextImpl::connect(\n    uint64_t laneIdx,\n    std::string address) {\n  TP_VLOG(4) << \"Channel context \" << id_ << \" opening connection on lane \"\n             << laneIdx;\n  return contexts_[laneIdx]->connect(std::move(address));\n}\n\nvoid ContextImpl::acceptLane(uint64_t laneIdx) {\n  TP_DCHECK(loop_.inLoop());\n\n  TP_VLOG(6) << \"Channel context \" << id_ << \" accepting connection on lane \"\n             << laneIdx;\n  listeners_[laneIdx]->accept(\n      callbackWrapper_([laneIdx](\n                           ContextImpl& impl,\n                           std::shared_ptr<transport::Connection> connection) {\n        TP_VLOG(6) << \"Channel context \" << impl.id_\n                   << \" done accepting connection on lane \" << laneIdx;\n        if (impl.error_) {\n          return;\n        }\n        impl.onAcceptOfLane(std::move(connection));\n        impl.acceptLane(laneIdx);\n      }));\n}\n\nvoid ContextImpl::onAcceptOfLane(\n    std::shared_ptr<transport::Connection> connection) {\n  TP_DCHECK(loop_.inLoop());\n\n  // Keep it alive until we figure out what to do with it.\n  connectionsWaitingForHello_.insert(connection);\n  auto npHolderIn = std::make_shared<NopHolder<Packet>>();\n  TP_VLOG(6) << \"Channel context \" << id_\n             << \" reading nop object (client hello)\";\n  connection->read(\n      *npHolderIn,\n      callbackWrapper_([npHolderIn, connection](ContextImpl& impl) mutable {\n        TP_VLOG(6) << \"Channel context \" << impl.id_\n                   << \" done reading nop object (client hello)\";\n        if (impl.error_) {\n          return;\n        }\n        impl.connectionsWaitingForHello_.erase(connection);\n        impl.onReadClientHelloOnLane(\n            std::move(connection), npHolderIn->getObject());\n      }));\n}\n\nvoid ContextImpl::onReadClientHelloOnLane(\n    std::shared_ptr<transport::Connection> connection,\n    const Packet& nopPacketIn) {\n  TP_DCHECK(loop_.inLoop());\n  TP_DCHECK_EQ(nopPacketIn.index(), nopPacketIn.index_of<ClientHello>());\n\n  const ClientHello& nopClientHello = *nopPacketIn.get<ClientHello>();\n  uint64_t registrationId = nopClientHello.registrationId;\n  auto iter = connectionRequestRegistrations_.find(registrationId);\n  // The connection request may have already been deregistered, for example\n  // because the channel may have been closed.\n  if (iter != connectionRequestRegistrations_.end()) {\n    auto fn = std::move(iter->second);\n    connectionRequestRegistrations_.erase(iter);\n    fn(Error::kSuccess, std::move(connection));\n  }\n}\n\nvoid ContextImpl::handleErrorImpl() {\n  for (auto& iter : connectionRequestRegistrations_) {\n    connection_request_callback_fn fn = std::move(iter.second);\n    fn(error_, std::shared_ptr<transport::Connection>());\n  }\n  connectionRequestRegistrations_.clear();\n\n  for (const auto& connection : connectionsWaitingForHello_) {\n    connection->close();\n  }\n  connectionsWaitingForHello_.clear();\n\n  for (auto& listener : listeners_) {\n    listener->close();\n  }\n  for (auto& context : contexts_) {\n    context->close();\n  }\n}\n\nvoid ContextImpl::setIdImpl() {\n  for (uint64_t laneIdx = 0; laneIdx < numLanes_; ++laneIdx) {\n    contexts_[laneIdx]->setId(id_ + \".ctx_\" + std::to_string(laneIdx));\n    listeners_[laneIdx]->setId(\n        id_ + \".ctx_\" + std::to_string(laneIdx) + \".l_\" +\n        std::to_string(laneIdx));\n  }\n}\n\nvoid ContextImpl::joinImpl() {\n  for (auto& context : contexts_) {\n    context->join();\n  }\n}\n\nbool ContextImpl::inLoop() const {\n  return loop_.inLoop();\n};\n\nvoid ContextImpl::deferToLoop(std::function<void()> fn) {\n  loop_.deferToLoop(std::move(fn));\n};\n\n} // namespace mpt\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/mpt/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <functional>\n#include <memory>\n#include <string>\n#include <unordered_map>\n#include <unordered_set>\n#include <vector>\n\n#include <tensorpipe/channel/context_impl_boilerplate.h>\n#include <tensorpipe/channel/mpt/nop_types.h>\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/device.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace mpt {\n\nclass ChannelImpl;\n\nclass ContextImpl final\n    : public ContextImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  static std::shared_ptr<ContextImpl> create(\n      std::vector<std::shared_ptr<transport::Context>> contexts,\n      std::vector<std::shared_ptr<transport::Listener>> listeners);\n\n  ContextImpl(\n      std::vector<std::shared_ptr<transport::Context>> contexts,\n      std::vector<std::shared_ptr<transport::Listener>> listeners,\n      std::unordered_map<Device, std::string> deviceDescriptors);\n\n  std::shared_ptr<Channel> createChannel(\n      std::vector<std::shared_ptr<transport::Connection>> connections,\n      Endpoint endpoint);\n\n  // Implement the DeferredExecutor interface.\n  bool inLoop() const override;\n  void deferToLoop(std::function<void()> fn) override;\n\n  using connection_request_callback_fn =\n      std::function<void(const Error&, std::shared_ptr<transport::Connection>)>;\n\n  const std::vector<std::string>& addresses() const;\n\n  uint64_t registerConnectionRequest(\n      uint64_t laneIdx,\n      connection_request_callback_fn fn);\n\n  void unregisterConnectionRequest(uint64_t registrationId);\n\n  std::shared_ptr<transport::Connection> connect(\n      uint64_t laneIdx,\n      std::string address);\n\n protected:\n  // Implement the entry points called by ContextImplBoilerplate.\n  void initImplFromLoop() override;\n  void handleErrorImpl() override;\n  void joinImpl() override;\n  void setIdImpl() override;\n\n private:\n  OnDemandDeferredExecutor loop_;\n\n  void acceptLane(uint64_t laneIdx);\n  void onAcceptOfLane(std::shared_ptr<transport::Connection> connection);\n  void onReadClientHelloOnLane(\n      std::shared_ptr<transport::Connection> connection,\n      const Packet& nopPacketIn);\n\n  const std::vector<std::shared_ptr<transport::Context>> contexts_;\n  const std::vector<std::shared_ptr<transport::Listener>> listeners_;\n\n  uint64_t numLanes_{0};\n  std::vector<std::string> addresses_;\n\n  uint64_t nextConnectionRequestRegistrationId_{0};\n\n  // Needed to keep them alive.\n  std::unordered_set<std::shared_ptr<transport::Connection>>\n      connectionsWaitingForHello_;\n\n  std::unordered_map<uint64_t, connection_request_callback_fn>\n      connectionRequestRegistrations_;\n};\n\n} // namespace mpt\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/mpt/factory.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/mpt/factory.h>\n\n#include <tensorpipe/channel/context_boilerplate.h>\n#include <tensorpipe/channel/mpt/channel_impl.h>\n#include <tensorpipe/channel/mpt/context_impl.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace mpt {\n\nstd::shared_ptr<Context> create(\n    std::vector<std::shared_ptr<transport::Context>> contexts,\n    std::vector<std::shared_ptr<transport::Listener>> listeners) {\n  return std::make_shared<ContextBoilerplate<ContextImpl, ChannelImpl>>(\n      std::move(contexts), std::move(listeners));\n}\n\n} // namespace mpt\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/mpt/factory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <vector>\n\n#include <tensorpipe/channel/context.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace mpt {\n\nstd::shared_ptr<Context> create(\n    std::vector<std::shared_ptr<transport::Context>> contexts,\n    std::vector<std::shared_ptr<transport::Listener>> listeners);\n\n} // namespace mpt\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/mpt/nop_types.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n#include <vector>\n\n#include <nop/serializer.h>\n#include <nop/structure.h>\n#include <nop/types/variant.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace mpt {\n\nstruct LaneAdvertisement {\n  // This pointless constructor is needed to work around a bug in GCC 5.5 (and\n  // possibly other versions). It appears to be needed in the nop types that are\n  // used inside std::vectors.\n  LaneAdvertisement() {}\n\n  std::string address;\n  uint64_t registrationId;\n  NOP_STRUCTURE(LaneAdvertisement, address, registrationId);\n};\n\nstruct ServerHello {\n  std::vector<LaneAdvertisement> laneAdvertisements;\n  NOP_STRUCTURE(ServerHello, laneAdvertisements);\n};\n\nstruct ClientHello {\n  uint64_t registrationId;\n  NOP_STRUCTURE(ClientHello, registrationId);\n};\n\nusing Packet = nop::Variant<ServerHello, ClientHello>;\n\n} // namespace mpt\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/xth/channel_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/xth/channel_impl.h>\n\n#include <memory>\n#include <string>\n#include <utility>\n\n#include <nop/serializer.h>\n#include <nop/structure.h>\n\n#include <tensorpipe/channel/xth/context_impl.h>\n#include <tensorpipe/common/cpu_buffer.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/transport/connection.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace xth {\n\nnamespace {\n\nstruct Descriptor {\n  uint64_t ptr;\n  NOP_STRUCTURE(Descriptor, ptr);\n};\n\n} // namespace\n\nChannelImpl::ChannelImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::shared_ptr<transport::Connection> descriptorConnection,\n    std::shared_ptr<transport::Connection> completionConnection)\n    : ChannelImplBoilerplate<ContextImpl, ChannelImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      descriptorConnection_(std::move(descriptorConnection)),\n      completionConnection_(std::move(completionConnection)) {}\n\nvoid ChannelImpl::initImplFromLoop() {\n  context_->enroll(*this);\n}\n\nvoid ChannelImpl::sendImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TSendCallback callback) {\n  SendOpIter opIter = sendOps_.emplaceBack(sequenceNumber);\n  SendOperation& op = *opIter;\n  op.ptr = buffer.unwrap<CpuBuffer>().ptr;\n  op.length = length;\n  op.callback = std::move(callback);\n\n  sendOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceSendOperation(\n    SendOpIter opIter,\n    SendOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  SendOperation& op = *opIter;\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the descriptor control connection and read calls on the\n  // completion control connection.\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::UNINITIALIZED,\n      /*to=*/SendOperation::READING_COMPLETION,\n      /*cond=*/!error_ && prevOpState >= SendOperation::READING_COMPLETION,\n      /*actions=*/\n      {&ChannelImpl::writeDescriptor, &ChannelImpl::readCompletion});\n\n  sendOps_.attemptTransition(\n      opIter,\n      /*from=*/SendOperation::READING_COMPLETION,\n      /*to=*/SendOperation::FINISHED,\n      /*cond=*/op.doneReadingCompletion,\n      /*actions=*/{&ChannelImpl::callSendCallback});\n}\n\nvoid ChannelImpl::writeDescriptor(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  auto nopHolder = std::make_shared<NopHolder<Descriptor>>();\n  Descriptor& nopDescriptor = nopHolder->getObject();\n  nopDescriptor.ptr = reinterpret_cast<std::uintptr_t>(op.ptr);\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing descriptor (#\"\n             << op.sequenceNumber << \")\";\n  descriptorConnection_->write(\n      *nopHolder,\n      callbackWrapper_([sequenceNumber{op.sequenceNumber},\n                        nopHolder](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done writing descriptor (#\"\n                   << sequenceNumber << \")\";\n      }));\n}\n\nvoid ChannelImpl::readCompletion(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading completion (#\"\n             << op.sequenceNumber << \")\";\n  completionConnection_->read(\n      nullptr,\n      0,\n      callbackWrapper_([opIter](\n                           ChannelImpl& impl,\n                           const void* /* unused */,\n                           size_t /* unused */) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done reading completion (#\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingCompletion = true;\n        impl.sendOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::callSendCallback(SendOpIter opIter) {\n  SendOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::recvImplFromLoop(\n    uint64_t sequenceNumber,\n    Buffer buffer,\n    size_t length,\n    TRecvCallback callback) {\n  RecvOpIter opIter = recvOps_.emplaceBack(sequenceNumber);\n  RecvOperation& op = *opIter;\n  op.ptr = buffer.unwrap<CpuBuffer>().ptr;\n  op.length = length;\n  op.callback = std::move(callback);\n\n  recvOps_.advanceOperation(opIter);\n}\n\nvoid ChannelImpl::advanceRecvOperation(\n    RecvOpIter opIter,\n    RecvOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  RecvOperation& op = *opIter;\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ || op.length == 0,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of read calls on the descriptor control connection.\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::UNINITIALIZED,\n      /*to=*/RecvOperation::READING_DESCRIPTOR,\n      /*cond=*/!error_ && prevOpState >= RecvOperation::READING_DESCRIPTOR,\n      /*actions=*/{&ChannelImpl::readDescriptor});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::READING_DESCRIPTOR,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ && op.doneReadingDescriptor,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::READING_DESCRIPTOR,\n      /*to=*/RecvOperation::COPYING,\n      /*cond=*/!error_ && op.doneReadingDescriptor,\n      /*actions=*/{&ChannelImpl::copy});\n\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::COPYING,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/error_ && op.doneCopying,\n      /*actions=*/{&ChannelImpl::callRecvCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the completion control connection.\n  recvOps_.attemptTransition(\n      opIter,\n      /*from=*/RecvOperation::COPYING,\n      /*to=*/RecvOperation::FINISHED,\n      /*cond=*/!error_ && op.doneCopying &&\n          prevOpState >= RecvOperation::FINISHED,\n      /*actions=*/\n      {&ChannelImpl::callRecvCallback, &ChannelImpl::writeCompletion});\n}\n\nvoid ChannelImpl::readDescriptor(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is reading descriptor (#\"\n             << op.sequenceNumber << \")\";\n  auto nopHolderIn = std::make_shared<NopHolder<Descriptor>>();\n  descriptorConnection_->read(\n      *nopHolderIn, callbackWrapper_([opIter, nopHolderIn](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done reading descriptor (#\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingDescriptor = true;\n        if (!impl.error_) {\n          Descriptor& nopDescriptor = nopHolderIn->getObject();\n          opIter->remotePtr = reinterpret_cast<void*>(nopDescriptor.ptr);\n        }\n        impl.recvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::copy(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is copying payload (#\"\n             << op.sequenceNumber << \")\";\n  context_->requestCopy(\n      op.remotePtr,\n      op.ptr,\n      op.length,\n      callbackWrapper_([opIter](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done copying payload (#\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneCopying = true;\n        impl.recvOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid ChannelImpl::callRecvCallback(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  op.callback(error_);\n  // Reset callback to release the resources it was holding.\n  op.callback = nullptr;\n}\n\nvoid ChannelImpl::writeCompletion(RecvOpIter opIter) {\n  RecvOperation& op = *opIter;\n\n  TP_VLOG(6) << \"Channel \" << id_ << \" is writing completion (#\"\n             << op.sequenceNumber << \")\";\n  completionConnection_->write(\n      nullptr,\n      0,\n      callbackWrapper_([sequenceNumber{op.sequenceNumber}](ChannelImpl& impl) {\n        TP_VLOG(6) << \"Channel \" << impl.id_ << \" done writing completion (#\"\n                   << sequenceNumber << \")\";\n      }));\n}\n\nvoid ChannelImpl::handleErrorImpl() {\n  sendOps_.advanceAllOperations();\n  recvOps_.advanceAllOperations();\n\n  descriptorConnection_->close();\n  completionConnection_->close();\n\n  context_->unenroll(*this);\n}\n\n} // namespace xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/xth/channel_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n\n#include <tensorpipe/channel/channel_impl_boilerplate.h>\n#include <tensorpipe/common/state_machine.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace xth {\n\nclass ContextImpl;\n\nstruct SendOperation {\n  enum State { UNINITIALIZED, READING_COMPLETION, FINISHED };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReadingCompletion{false};\n\n  // Arguments at creation\n  void* ptr;\n  size_t length;\n  TSendCallback callback;\n};\n\nstruct RecvOperation {\n  enum State { UNINITIALIZED, READING_DESCRIPTOR, COPYING, FINISHED };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReadingDescriptor{false};\n  bool doneCopying{false};\n\n  // Arguments at creation\n  void* ptr;\n  size_t length;\n  TRecvCallback callback;\n\n  // Other data\n  void* remotePtr;\n};\n\nclass ChannelImpl final\n    : public ChannelImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  ChannelImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::shared_ptr<transport::Connection> descriptorConnection,\n      std::shared_ptr<transport::Connection> completionConnection);\n\n protected:\n  // Implement the entry points called by ChannelImplBoilerplate.\n  void initImplFromLoop() override;\n  void sendImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TSendCallback callback) override;\n  void recvImplFromLoop(\n      uint64_t sequenceNumber,\n      Buffer buffer,\n      size_t length,\n      TRecvCallback callback) override;\n  void handleErrorImpl() override;\n\n private:\n  const std::shared_ptr<transport::Connection> descriptorConnection_;\n  const std::shared_ptr<transport::Connection> completionConnection_;\n\n  OpsStateMachine<ChannelImpl, SendOperation> sendOps_{\n      *this,\n      &ChannelImpl::advanceSendOperation};\n  using SendOpIter = decltype(sendOps_)::Iter;\n  OpsStateMachine<ChannelImpl, RecvOperation> recvOps_{\n      *this,\n      &ChannelImpl::advanceRecvOperation};\n  using RecvOpIter = decltype(recvOps_)::Iter;\n\n  // State machines for send and recv ops.\n  void advanceSendOperation(\n      SendOpIter opIter,\n      SendOperation::State prevOpState);\n  void advanceRecvOperation(\n      RecvOpIter opIter,\n      RecvOperation::State prevOpState);\n\n  // Actions (i.e., methods that begin a state transition).\n  // For send operations:\n  void writeDescriptor(SendOpIter opIter);\n  void readCompletion(SendOpIter opIter);\n  void callSendCallback(SendOpIter opIter);\n  // For recv operations:\n  void readDescriptor(RecvOpIter opIter);\n  void copy(RecvOpIter opIter);\n  void callRecvCallback(RecvOpIter opIter);\n  void writeCompletion(RecvOpIter opIter);\n};\n\n} // namespace xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/xth/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/xth/context_impl.h>\n\n#include <unistd.h>\n\n#include <cstring>\n#include <functional>\n#include <limits>\n#include <sstream>\n#include <string>\n#include <thread>\n#include <utility>\n\n#include <tensorpipe/channel/xth/channel_impl.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/system.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace xth {\n\nstd::shared_ptr<ContextImpl> ContextImpl::create() {\n  std::ostringstream oss;\n  auto bootID = getBootID();\n  TP_THROW_ASSERT_IF(!bootID) << \"Unable to read boot_id\";\n  auto nsID = getLinuxNamespaceId(LinuxNamespace::kPid);\n  if (!nsID.has_value()) {\n    TP_VLOG(5)\n        << \"XTH channel is not viable because it couldn't determine the PID namespace ID\";\n    return nullptr;\n  }\n  oss << bootID.value() << \"_\" << nsID.value() << \"_\" << ::getpid();\n  const std::string domainDescriptor = oss.str();\n\n  std::unordered_map<Device, std::string> deviceDescriptors = {\n      {Device{kCpuDeviceType, 0}, domainDescriptor}};\n  return std::make_shared<ContextImpl>(std::move(deviceDescriptors));\n}\n\nContextImpl::ContextImpl(\n    std::unordered_map<Device, std::string> deviceDescriptors)\n    : ContextImplBoilerplate<ContextImpl, ChannelImpl>(\n          std::move(deviceDescriptors)),\n      requests_(std::numeric_limits<int>::max()) {\n  thread_ = std::thread(&ContextImpl::handleCopyRequests, this);\n}\n\nstd::shared_ptr<Channel> ContextImpl::createChannel(\n    std::vector<std::shared_ptr<transport::Connection>> connections,\n    Endpoint /* unused */) {\n  TP_DCHECK_EQ(numConnectionsNeeded(), connections.size());\n  return createChannelInternal(\n      std::move(connections[0]), std::move(connections[1]));\n}\n\nsize_t ContextImpl::numConnectionsNeeded() const {\n  return 2;\n}\n\nvoid ContextImpl::handleErrorImpl() {\n  requests_.push(nullopt);\n}\n\nvoid ContextImpl::joinImpl() {\n  thread_.join();\n  // TP_DCHECK(requests_.empty());\n}\n\nbool ContextImpl::inLoop() const {\n  return loop_.inLoop();\n};\n\nvoid ContextImpl::deferToLoop(std::function<void()> fn) {\n  loop_.deferToLoop(std::move(fn));\n};\n\nvoid ContextImpl::requestCopy(\n    void* remotePtr,\n    void* localPtr,\n    size_t length,\n    std::function<void(const Error&)> fn) {\n  uint64_t requestId = nextRequestId_++;\n  TP_VLOG(4) << \"Channel context \" << id_ << \" received a copy request (#\"\n             << requestId << \")\";\n\n  fn = [this, requestId, fn{std::move(fn)}](const Error& error) {\n    TP_VLOG(4) << \"Channel context \" << id_\n               << \" is calling a copy request callback (#\" << requestId << \")\";\n    fn(error);\n    TP_VLOG(4) << \"Channel context \" << id_\n               << \" done calling a copy request callback (#\" << requestId\n               << \")\";\n  };\n\n  requests_.push(CopyRequest{remotePtr, localPtr, length, std::move(fn)});\n}\n\nvoid ContextImpl::handleCopyRequests() {\n  setThreadName(\"TP_XTH_loop\");\n  while (true) {\n    auto maybeRequest = requests_.pop();\n    if (!maybeRequest.has_value()) {\n      break;\n    }\n    CopyRequest request = std::move(maybeRequest).value();\n\n    // Don't even call memcpy on a length of 0 to avoid issues with the pointer\n    // possibly being null.\n    if (request.length > 0) {\n      // Perform copy.\n      std::memcpy(request.localPtr, request.remotePtr, request.length);\n    }\n\n    request.callback(Error::kSuccess);\n  }\n}\n\n} // namespace xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/xth/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <functional>\n#include <thread>\n\n#include <tensorpipe/channel/context_impl_boilerplate.h>\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/device.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/common/queue.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace xth {\n\nclass ChannelImpl;\n\nclass ContextImpl final\n    : public ContextImplBoilerplate<ContextImpl, ChannelImpl> {\n public:\n  static std::shared_ptr<ContextImpl> create();\n\n  explicit ContextImpl(\n      std::unordered_map<Device, std::string> deviceDescriptors);\n\n  std::shared_ptr<Channel> createChannel(\n      std::vector<std::shared_ptr<transport::Connection>> connections,\n      Endpoint endpoint);\n\n  size_t numConnectionsNeeded() const override;\n\n  // Implement the DeferredExecutor interface.\n  bool inLoop() const override;\n  void deferToLoop(std::function<void()> fn) override;\n\n  using copy_request_callback_fn = std::function<void(const Error&)>;\n\n  void requestCopy(\n      void* remotePtr,\n      void* localPtr,\n      size_t length,\n      copy_request_callback_fn fn);\n\n protected:\n  // Implement the entry points called by ContextImplBoilerplate.\n  void handleErrorImpl() override;\n  void joinImpl() override;\n\n private:\n  OnDemandDeferredExecutor loop_;\n\n  struct CopyRequest {\n    void* remotePtr;\n    void* localPtr;\n    size_t length;\n    copy_request_callback_fn callback;\n  };\n\n  std::thread thread_;\n  Queue<optional<CopyRequest>> requests_;\n\n  // This is atomic because it may be accessed from outside the loop.\n  std::atomic<uint64_t> nextRequestId_{0};\n\n  void handleCopyRequests();\n};\n\n} // namespace xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/xth/factory.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/xth/factory.h>\n\n#include <tensorpipe/channel/context_boilerplate.h>\n#include <tensorpipe/channel/xth/channel_impl.h>\n#include <tensorpipe/channel/xth/context_impl.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace xth {\n\nstd::shared_ptr<Context> create() {\n  return std::make_shared<ContextBoilerplate<ContextImpl, ChannelImpl>>();\n}\n\n} // namespace xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/channel/xth/factory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <tensorpipe/channel/context.h>\n\nnamespace tensorpipe {\nnamespace channel {\nnamespace xth {\n\nstd::shared_ptr<Context> create();\n\n} // namespace xth\n} // namespace channel\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/address.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/address.h>\n\n#include <tensorpipe/common/defs.h>\n\nnamespace tensorpipe {\n\nstd::tuple<std::string, std::string> splitSchemeOfURL(const std::string& url) {\n  std::string::size_type endOfScheme = url.find(\"://\");\n  if (endOfScheme == std::string::npos) {\n    TP_THROW_EINVAL() << \"url has no scheme: \" << url;\n  }\n  return std::make_tuple(\n      url.substr(0, endOfScheme), url.substr(endOfScheme + 3));\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/address.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n\nnamespace tensorpipe {\n\nstd::tuple<std::string, std::string> splitSchemeOfURL(const std::string& url);\n\n}\n"
  },
  {
    "path": "tensorpipe/common/allocator.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/allocator.h>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error_macros.h>\n\nnamespace tensorpipe {\n\nAllocator::Allocator(uint8_t* data, size_t numChunks, size_t chunkSize)\n    : numChunks_(numChunks),\n      chunkSize_(chunkSize),\n      data_(data),\n      chunkAvailable_(numChunks, true) {}\n\nAllocator::~Allocator() {\n  close();\n}\n\nvoid Allocator::alloc(size_t size, TAllocCallback callback) {\n  TP_DCHECK(size <= chunkSize_);\n  pendingAllocations_.push_back(std::move(callback));\n  processAllocations();\n}\n\nsize_t Allocator::getChunkLength() const {\n  return chunkSize_;\n}\n\nvoid Allocator::close() {\n  if (closed_) {\n    return;\n  }\n  closed_ = true;\n  processAllocations();\n}\n\nvoid Allocator::processAllocations() {\n  while (!pendingAllocations_.empty()) {\n    auto& callback = pendingAllocations_.front();\n    if (closed_) {\n      callback(TP_CREATE_ERROR(AllocatorClosedError), nullptr);\n    } else {\n      TChunk ptr = getAvailableChunk();\n      if (!ptr) {\n        break;\n      }\n      callback(Error::kSuccess, std::move(ptr));\n    }\n    pendingAllocations_.pop_front();\n  }\n}\n\nAllocator::TChunk Allocator::getAvailableChunk() {\n  for (size_t curChunk = 0; curChunk < numChunks_; ++curChunk) {\n    if (chunkAvailable_[curChunk]) {\n      chunkAvailable_[curChunk] = false;\n      ++allocatedChunks_;\n      return TChunk(data_ + curChunk * chunkSize_, [this](uint8_t* ptr) {\n        releaseChunk(ptr);\n      });\n    }\n  }\n\n  return nullptr;\n}\n\nvoid Allocator::releaseChunk(uint8_t* ptr) {\n  size_t chunkId = (ptr - data_) / chunkSize_;\n  chunkAvailable_[chunkId] = true;\n  --allocatedChunks_;\n  processAllocations();\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/allocator.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstddef>\n#include <cstdint>\n#include <deque>\n#include <functional>\n#include <memory>\n#include <vector>\n\n#include <tensorpipe/common/error.h>\n\nnamespace tensorpipe {\n\nclass AllocatorClosedError final : public BaseError {\n  std::string what() const override {\n    return \"allocator closed\";\n  }\n};\n\nclass Allocator {\n public:\n  // Note: this is a std::shared_ptr<uint8_t[]> semantically. A shared_ptr with\n  // array type is supported in C++17 and higher.\n  using TChunk = std::shared_ptr<uint8_t>;\n  using TAllocCallback = std::function<void(const Error&, TChunk)>;\n\n  explicit Allocator(uint8_t* data, size_t numChunks, size_t chunkSize);\n\n  ~Allocator();\n\n  void alloc(size_t size, TAllocCallback callback);\n  size_t getChunkLength() const;\n\n  void close();\n\n private:\n  const size_t numChunks_;\n  const size_t chunkSize_;\n  uint8_t* const data_;\n  std::vector<bool> chunkAvailable_;\n  size_t allocatedChunks_{0};\n  std::deque<TAllocCallback> pendingAllocations_;\n  bool closed_{false};\n\n  void processAllocations();\n  TChunk getAvailableChunk();\n  void releaseChunk(uint8_t* ptr);\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/buffer.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstddef>\n#include <stdexcept>\n#include <type_traits>\n#include <utility>\n\n#include <tensorpipe/common/cpu_buffer.h>\n#include <tensorpipe/common/device.h>\n\nnamespace tensorpipe {\n\nclass Buffer {\n  class AbstractBufferWrapper {\n   public:\n    virtual Device device() const = 0;\n    virtual void copyConstructInto(void* ptr) const = 0;\n    virtual void moveConstructInto(void* ptr) = 0;\n    virtual ~AbstractBufferWrapper() = default;\n  };\n\n  template <typename TBuffer>\n  class BufferWrapper : public AbstractBufferWrapper {\n    static_assert(\n        std::is_trivially_copyable<TBuffer>::value,\n        \"wrapping non-trivially copyable class\");\n\n   public:\n    TBuffer buffer;\n\n    explicit BufferWrapper(TBuffer buffer) : buffer(std::move(buffer)) {}\n\n    Device device() const override {\n      return buffer.getDevice();\n    }\n\n    void copyConstructInto(void* ptr) const override {\n      new (ptr) BufferWrapper(*this);\n    }\n\n    void moveConstructInto(void* ptr) override {\n      new (ptr) BufferWrapper(std::move(*this));\n    }\n  };\n\n public:\n  template <typename TBuffer>\n  /* implicit */ Buffer(TBuffer b) {\n    static_assert(\n        sizeof(BufferWrapper<TBuffer>) <= kStructSize, \"kStructSize too small\");\n    static_assert(\n        alignof(BufferWrapper<TBuffer>) <= kStructAlign,\n        \"kStructAlign too small\");\n    new (&raw_) BufferWrapper<TBuffer>(std::move(b));\n  }\n\n  Buffer() : Buffer(CpuBuffer{}) {}\n\n  Buffer(const Buffer& other) {\n    other.ptr()->copyConstructInto(&raw_);\n  }\n\n  Buffer& operator=(const Buffer& other) {\n    if (this != &other) {\n      ptr()->~AbstractBufferWrapper();\n      other.ptr()->copyConstructInto(&raw_);\n    }\n    return *this;\n  }\n\n  Buffer(Buffer&& other) noexcept {\n    other.ptr()->moveConstructInto(&raw_);\n  }\n\n  Buffer& operator=(Buffer&& other) {\n    if (this != &other) {\n      ptr()->~AbstractBufferWrapper();\n      other.ptr()->moveConstructInto(&raw_);\n    }\n    return *this;\n  }\n\n  ~Buffer() {\n    ptr()->~AbstractBufferWrapper();\n  }\n\n  template <typename TBuffer>\n  TBuffer& unwrap() {\n    BufferWrapper<TBuffer>* wrapperPtr =\n        dynamic_cast<BufferWrapper<TBuffer>*>(ptr());\n    if (wrapperPtr == nullptr) {\n      throw std::runtime_error(\"Invalid unwrapping of tensorpipe::Buffer\");\n    }\n    return wrapperPtr->buffer;\n  }\n\n  template <typename TBuffer>\n  const TBuffer& unwrap() const {\n    const BufferWrapper<TBuffer>* wrapperPtr =\n        dynamic_cast<const BufferWrapper<TBuffer>*>(ptr());\n    if (wrapperPtr == nullptr) {\n      throw std::runtime_error(\"Invalid unwrapping of tensorpipe::Buffer\");\n    }\n    return wrapperPtr->buffer;\n  }\n\n  Device device() const {\n    return ptr()->device();\n  }\n\n private:\n  static constexpr int kStructSize = 32;\n  static constexpr int kStructAlign = 8;\n  std::aligned_storage<kStructSize, kStructAlign>::type raw_{};\n\n  const AbstractBufferWrapper* ptr() const {\n    // FIXME: Once we go C++17, use std::launder on the returned pointer.\n    return reinterpret_cast<const AbstractBufferWrapper*>(&raw_);\n  }\n\n  AbstractBufferWrapper* ptr() {\n    // FIXME: Once we go C++17, use std::launder on the returned pointer.\n    return reinterpret_cast<AbstractBufferWrapper*>(&raw_);\n  }\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/busy_polling_loop.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <string>\n#include <thread>\n#include <utility>\n\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/system.h>\n\nnamespace tensorpipe {\n\nclass BusyPollingLoop : public EventLoopDeferredExecutor {\n protected:\n  virtual bool pollOnce() = 0;\n\n  virtual bool readyToClose() = 0;\n\n  void stopBusyPolling() {\n    closed_ = true;\n    // No need to wake up the thread, since it is busy-waiting.\n  }\n\n  void eventLoop() override {\n    while (!closed_ || !readyToClose()) {\n      if (pollOnce()) {\n        // continue\n      } else if (deferredFunctionCount_ > 0) {\n        deferredFunctionCount_ -= runDeferredFunctionsFromEventLoop();\n      } else {\n        std::this_thread::yield();\n      }\n    }\n  }\n\n  void wakeupEventLoopToDeferFunction() override {\n    ++deferredFunctionCount_;\n    // No need to wake up the thread, since it is busy-waiting.\n  }\n\n private:\n  std::atomic<bool> closed_{false};\n\n  std::atomic<int64_t> deferredFunctionCount_{0};\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/callback.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <deque>\n#include <functional>\n#include <memory>\n#include <mutex>\n#include <thread>\n#include <tuple>\n#include <unordered_map>\n\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/optional.h>\n\nnamespace tensorpipe {\n\nnamespace {\n\n// NOTE: This is an incomplete implementation of C++17's `std::apply`.\ntemplate <typename F, typename T, size_t... I>\nauto cbApply(F&& f, T&& t, std::index_sequence<I...> /*unused*/) {\n  return f(std::get<I>(std::forward<T>(t))...);\n}\n\ntemplate <typename F, typename T>\nauto cbApply(F&& f, T&& t) {\n  return cbApply(\n      std::move(f),\n      std::forward<T>(t),\n      std::make_index_sequence<std::tuple_size<T>::value>{});\n}\n\n} // namespace\n\n// A wrapper for a callback that \"burns out\" after it fires and thus needs to be\n// rearmed every time. Invocations that are triggered while the callback is\n// unarmed are stashed and will be delayed until a callback is provided again.\ntemplate <typename... Args>\nclass RearmableCallback {\n  using TFn = std::function<void(Args...)>;\n  using TStoredArgs = std::tuple<typename std::remove_reference<Args>::type...>;\n\n public:\n  void arm(TFn fn) {\n    if (!args_.empty()) {\n      TStoredArgs args{std::move(args_.front())};\n      args_.pop_front();\n      cbApply(std::move(fn), std::move(args));\n    } else {\n      callbacks_.push_back(std::move(fn));\n    }\n  }\n\n  void trigger(Args... args) {\n    if (!callbacks_.empty()) {\n      TFn fn{std::move(callbacks_.front())};\n      callbacks_.pop_front();\n      cbApply(std::move(fn), std::tuple<Args...>(std::forward<Args>(args)...));\n    } else {\n      args_.emplace_back(std::forward<Args>(args)...);\n    }\n  }\n\n  // This method is intended for \"flushing\" the callback, for example when an\n  // error condition is reached which means that no more callbacks will be\n  // processed but the current ones still must be honored.\n  void triggerAll(std::function<std::tuple<Args...>()> generator) {\n    while (!callbacks_.empty()) {\n      TFn fn{std::move(callbacks_.front())};\n      callbacks_.pop_front();\n      cbApply(std::move(fn), generator());\n    }\n  }\n\n private:\n  std::deque<TFn> callbacks_;\n  std::deque<TStoredArgs> args_;\n};\n\n// This class provides some boilerplate that is used by the pipe, the listener\n// and others when passing a callback to some lower-level component.\n// It will acquire a shared_ptr to the object (thus preventing the object from\n// being destroyed until the callback has been fired) and in case of error it\n// will deal with it but it will still end up invoking the actual callback.\ntemplate <typename TSubject>\nclass CallbackWrapper {\n public:\n  CallbackWrapper(\n      std::enable_shared_from_this<TSubject>& subject,\n      DeferredExecutor& loop)\n      : subject_(subject), loop_(loop) {}\n\n  template <typename TBoundFn>\n  auto operator()(TBoundFn fn) {\n    return [this, subject{subject_.shared_from_this()}, fn{std::move(fn)}](\n               const Error& error, auto&&... args) mutable {\n      this->entryPoint(\n          std::move(subject),\n          std::move(fn),\n          error,\n          std::forward<decltype(args)>(args)...);\n    };\n  }\n\n private:\n  std::enable_shared_from_this<TSubject>& subject_;\n  DeferredExecutor& loop_;\n\n  template <typename TBoundFn, typename... Args>\n  void entryPoint(\n      std::shared_ptr<TSubject> subject,\n      TBoundFn fn,\n      const Error& error,\n      Args&&... args) {\n    // Do *NOT* move subject into the lambda's closure, as the shared_ptr we're\n    // holding may be the last one keeping subject alive, in which case it would\n    // die once the lambda runs, and it might kill the loop in turn too, _while_\n    // the loop's deferToLoop method is running. That's bad. So copy it instead.\n    // FIXME We're copying the args here...\n    loop_.deferToLoop(\n        [this, subject, fn{std::move(fn)}, error{error}, args...]() mutable {\n          entryPointFromLoop(\n              *subject, std::move(fn), error, std::forward<Args>(args)...);\n        });\n  }\n\n  template <typename TBoundFn, typename... Args>\n  void entryPointFromLoop(\n      TSubject& subject,\n      TBoundFn fn,\n      const Error& error,\n      Args&&... args) {\n    TP_DCHECK(loop_.inLoop());\n\n    subject.setError(error);\n    // Proceed regardless of any error: this is why it's called \"eager\".\n    fn(subject, std::forward<Args>(args)...);\n  }\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/cpu_buffer.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <tensorpipe/common/device.h>\n\nnamespace tensorpipe {\n\nstruct CpuBuffer {\n  void* ptr{nullptr};\n\n  Device getDevice() const {\n    return Device{kCpuDeviceType, 0};\n  }\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/cuda.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <iomanip>\n#include <ios>\n#include <memory>\n#include <sstream>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include <cuda_runtime.h>\n\n#include <tensorpipe/common/cuda_lib.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/device.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/strings.h>\n\n#define TP_CUDA_CHECK(a)                                                \\\n  do {                                                                  \\\n    cudaError_t error = (a);                                            \\\n    TP_THROW_ASSERT_IF(cudaSuccess != error)                            \\\n        << __TP_EXPAND_OPD(a) << \" \" << cudaGetErrorName(error) << \" (\" \\\n        << cudaGetErrorString(error) << \")\";                            \\\n  } while (false)\n\nnamespace tensorpipe {\n\nclass CudaError final : public BaseError {\n public:\n  explicit CudaError(cudaError_t error) : error_(error) {}\n\n  std::string what() const override {\n    return std::string(cudaGetErrorString(error_));\n  }\n\n private:\n  cudaError_t error_;\n};\n\nclass CudaDeviceGuard {\n public:\n  CudaDeviceGuard() = delete;\n  CudaDeviceGuard(const CudaDeviceGuard&) = delete;\n  CudaDeviceGuard(CudaDeviceGuard&&) = delete;\n  CudaDeviceGuard& operator=(const CudaDeviceGuard&) = delete;\n  CudaDeviceGuard& operator=(CudaDeviceGuard&&) = delete;\n\n  explicit CudaDeviceGuard(int device) {\n    TP_CUDA_CHECK(cudaGetDevice(&device_));\n    TP_CUDA_CHECK(cudaSetDevice(device));\n  }\n\n  ~CudaDeviceGuard() {\n    TP_CUDA_CHECK(cudaSetDevice(device_));\n  }\n\n private:\n  int device_;\n};\n\nclass CudaEvent {\n public:\n  CudaEvent() = delete;\n  CudaEvent(const CudaEvent&) = delete;\n  CudaEvent(CudaEvent&&) = delete;\n  CudaEvent& operator=(const CudaEvent&) = delete;\n  CudaEvent& operator=(CudaEvent&&) = delete;\n\n  explicit CudaEvent(int device, bool interprocess = false)\n      : deviceIdx_(device) {\n    CudaDeviceGuard guard(deviceIdx_);\n    int flags = cudaEventDisableTiming;\n    if (interprocess) {\n      flags |= cudaEventInterprocess;\n    }\n    TP_CUDA_CHECK(cudaEventCreateWithFlags(&ev_, flags));\n  }\n\n  explicit CudaEvent(int device, cudaIpcEventHandle_t handle)\n      : deviceIdx_(device) {\n    // It could crash if we don't set device when creating events from handles\n    CudaDeviceGuard guard(deviceIdx_);\n    TP_CUDA_CHECK(cudaIpcOpenEventHandle(&ev_, handle));\n  }\n\n  void record(cudaStream_t stream) {\n    CudaDeviceGuard guard(deviceIdx_);\n    TP_CUDA_CHECK(cudaEventRecord(ev_, stream));\n  }\n\n  void wait(cudaStream_t stream, int device) {\n    CudaDeviceGuard guard(device);\n    TP_CUDA_CHECK(cudaStreamWaitEvent(stream, ev_, 0));\n  }\n\n  bool query() const {\n    CudaDeviceGuard guard(deviceIdx_);\n    cudaError_t res = cudaEventQuery(ev_);\n    if (res == cudaErrorNotReady) {\n      return false;\n    }\n    TP_CUDA_CHECK(res);\n    return true;\n  }\n\n  cudaEvent_t raw() {\n    return ev_;\n  }\n\n  cudaIpcEventHandle_t getIpcHandle() const {\n    CudaDeviceGuard guard(deviceIdx_);\n    cudaIpcEventHandle_t handle;\n    TP_CUDA_CHECK(cudaIpcGetEventHandle(&handle, ev_));\n    return handle;\n  }\n\n  std::string serializedHandle() {\n    cudaIpcEventHandle_t handle = getIpcHandle();\n    return std::string(reinterpret_cast<const char*>(&handle), sizeof(handle));\n  }\n\n  ~CudaEvent() {\n    CudaDeviceGuard guard(deviceIdx_);\n    TP_CUDA_CHECK(cudaEventDestroy(ev_));\n  }\n\n private:\n  cudaEvent_t ev_;\n  int deviceIdx_;\n};\n\ninline int cudaDeviceForPointer(const CudaLib& cudaLib, const void* ptr) {\n  // When calling cudaSetDevice(0) when device 0 hasn't been initialized yet\n  // the CUDA runtime sets the current context of the CUDA driver to what's\n  // apparently an invalid non-null value. This causes cudaPointerGetAttributes\n  // to misbehave (possibly other functions too, but this is the only function\n  // that we call outside of a device guard). In fact, device guards are likely\n  // the reason we call cudaSetDevice(0) at all, because at destruction they\n  // reset the current device to the value it had before construction, and that\n  // will be zero if no other device guard was active at that point.\n  // The ugly workaround is to manually undo the runtime's errors, by clearing\n  // the driver's current context. In a sense, by creating a \"reverse\" guard.\n  CUcontext ctx;\n  TP_CUDA_DRIVER_CHECK(cudaLib, cudaLib.ctxGetCurrent(&ctx));\n  TP_CUDA_DRIVER_CHECK(cudaLib, cudaLib.ctxSetCurrent(nullptr));\n\n  int deviceIdx;\n  TP_CUDA_DRIVER_CHECK(\n      cudaLib,\n      cudaLib.pointerGetAttribute(\n          &deviceIdx,\n          CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL,\n          reinterpret_cast<CUdeviceptr>(ptr)));\n\n  TP_CUDA_DRIVER_CHECK(cudaLib, cudaLib.ctxSetCurrent(ctx));\n  return deviceIdx;\n}\n\nclass CudaPinnedMemoryDeleter {\n public:\n  explicit CudaPinnedMemoryDeleter(int deviceIdx) : deviceIdx_(deviceIdx) {}\n\n  void operator()(uint8_t* ptr) {\n    CudaDeviceGuard guard(deviceIdx_);\n    TP_CUDA_CHECK(cudaFreeHost(ptr));\n  }\n\n private:\n  const int deviceIdx_;\n};\n\nusing CudaPinnedBuffer = std::unique_ptr<uint8_t[], CudaPinnedMemoryDeleter>;\n\ninline CudaPinnedBuffer makeCudaPinnedBuffer(size_t length, int deviceIdx) {\n  CudaDeviceGuard guard(deviceIdx);\n  uint8_t* ptr;\n  TP_CUDA_CHECK(cudaMallocHost(&ptr, length));\n  return CudaPinnedBuffer(ptr, CudaPinnedMemoryDeleter(deviceIdx));\n}\n\nclass CudaDeviceBuffer {\n public:\n  CudaDeviceBuffer() = default;\n\n  CudaDeviceBuffer(size_t length, int deviceIdx) {\n    CudaDeviceGuard guard(deviceIdx);\n    uint8_t* ptr;\n    TP_CUDA_CHECK(cudaMalloc(&ptr, length));\n    ptr_ = {ptr, Deleter{deviceIdx}};\n  }\n\n  uint8_t* ptr() const {\n    return ptr_.get();\n  }\n\n  int deviceIdx() const {\n    return ptr_.get_deleter().deviceIdx;\n  }\n\n  void reset() {\n    ptr_.reset();\n  }\n\n  cudaIpcMemHandle_t getIpcHandle() const {\n    CudaDeviceGuard guard(deviceIdx());\n    cudaIpcMemHandle_t handle;\n    TP_CUDA_CHECK(cudaIpcGetMemHandle(&handle, ptr_.get()));\n    return handle;\n  }\n\n private:\n  struct Deleter {\n    int deviceIdx;\n\n    void operator()(uint8_t* ptr) {\n      CudaDeviceGuard guard(deviceIdx);\n      TP_CUDA_CHECK(cudaFree(ptr));\n    }\n  };\n\n  std::unique_ptr<uint8_t[], Deleter> ptr_;\n};\n\nclass CudaIpcBuffer {\n public:\n  CudaIpcBuffer() = default;\n\n  CudaIpcBuffer(int deviceIdx, const cudaIpcMemHandle_t& handle) {\n    CudaDeviceGuard guard(deviceIdx);\n    void* ptr;\n    TP_CUDA_CHECK(\n        cudaIpcOpenMemHandle(&ptr, handle, cudaIpcMemLazyEnablePeerAccess));\n    ptr_ = {reinterpret_cast<uint8_t*>(ptr), Deleter{deviceIdx}};\n  }\n\n  uint8_t* ptr() const {\n    return ptr_.get();\n  }\n\n  int deviceIdx() const {\n    return ptr_.get_deleter().deviceIdx;\n  }\n\n  void reset() {\n    ptr_.reset();\n  }\n\n private:\n  struct Deleter {\n    int deviceIdx;\n\n    void operator()(uint8_t* ptr) {\n      CudaDeviceGuard guard(deviceIdx);\n      TP_CUDA_CHECK(cudaIpcCloseMemHandle(ptr));\n    }\n  };\n\n  std::unique_ptr<uint8_t[], Deleter> ptr_;\n};\n\ninline std::string getUuidOfDevice(const CudaLib& cudaLib, int deviceIdx) {\n  CUdevice device;\n  TP_CUDA_DRIVER_CHECK(cudaLib, cudaLib.deviceGet(&device, deviceIdx));\n\n  CUuuid uuid;\n  TP_CUDA_DRIVER_CHECK(cudaLib, cudaLib.deviceGetUuid(&uuid, device));\n\n  // The CUDA driver and NVML choose two different format for UUIDs, hence we\n  // need to reconcile them. We do so using the most human readable format, that\n  // is \"aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\" (8-4-4-4-12).\n  std::ostringstream uuidSs;\n  uuidSs << std::hex << std::setfill('0');\n  for (int j = 0; j < 16; ++j) {\n    // The bitmask is required otherwise a negative value will get promoted to\n    // (signed) int with sign extension if char is signed.\n    uuidSs << std::setw(2) << (uuid.bytes[j] & 0xff);\n    if (j == 3 || j == 5 || j == 7 || j == 9) {\n      uuidSs << '-';\n    }\n  }\n\n  std::string uuidStr = uuidSs.str();\n  TP_THROW_ASSERT_IF(!isValidUuid(uuidStr))\n      << \"Couldn't obtain valid UUID for GPU #\" << deviceIdx\n      << \" from CUDA driver. Got: \" << uuidStr;\n\n  return uuidStr;\n}\n\ninline std::vector<std::string> getUuidsOfVisibleDevices(\n    const CudaLib& cudaLib) {\n  int deviceCount;\n  TP_CUDA_DRIVER_CHECK(cudaLib, cudaLib.deviceGetCount(&deviceCount));\n\n  std::vector<std::string> result(deviceCount);\n  for (int devIdx = 0; devIdx < deviceCount; ++devIdx) {\n    result[devIdx] = getUuidOfDevice(cudaLib, devIdx);\n  }\n\n  return result;\n}\n\ninline std::vector<Device> getCudaDevices(const CudaLib& cudaLib) {\n  int deviceCount;\n  TP_CUDA_DRIVER_CHECK(cudaLib, cudaLib.deviceGetCount(&deviceCount));\n  std::vector<Device> result(deviceCount);\n  for (int devIdx = 0; devIdx < deviceCount; ++devIdx) {\n    result[devIdx] = Device{kCudaDeviceType, devIdx};\n  }\n\n  return result;\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/cuda_buffer.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/cuda_buffer.h>\n\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/defs.h>\n\nnamespace tensorpipe {\n\nDevice CudaBuffer::getDevice() const {\n  static CudaLib cudaLib = []() {\n    Error error;\n    CudaLib lib;\n    std::tie(error, lib) = CudaLib::create();\n    TP_THROW_ASSERT_IF(error)\n        << \"Cannot get CUDA device for pointer because libcuda could not be loaded: \"\n        << error.what();\n    return lib;\n  }();\n\n  return Device{kCudaDeviceType, cudaDeviceForPointer(cudaLib, ptr)};\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/cuda_buffer.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cuda_runtime.h>\n\n#include <tensorpipe/common/device.h>\n\nnamespace tensorpipe {\n\nstruct CudaBuffer {\n  void* ptr{nullptr};\n  cudaStream_t stream{cudaStreamDefault};\n\n  Device getDevice() const;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/cuda_lib.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <cuda.h>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/dl.h>\n\n#define TP_CUDA_DRIVER_CHECK(cuda_lib, a)                                 \\\n  do {                                                                    \\\n    CUresult error = (a);                                                 \\\n    if (error != CUDA_SUCCESS) {                                          \\\n      CUresult res;                                                       \\\n      const char* errorName;                                              \\\n      const char* errorStr;                                               \\\n      res = cuda_lib.getErrorName(error, &errorName);                     \\\n      TP_THROW_ASSERT_IF(res != CUDA_SUCCESS);                            \\\n      res = cuda_lib.getErrorString(error, &errorStr);                    \\\n      TP_THROW_ASSERT_IF(res != CUDA_SUCCESS);                            \\\n      TP_THROW_ASSERT() << __TP_EXPAND_OPD(a) << \" \" << errorName << \" (\" \\\n                        << errorStr << \")\";                               \\\n    }                                                                     \\\n  } while (false)\n\nnamespace tensorpipe {\n\nclass NoDevicesError final : public BaseError {\n public:\n  std::string what() const override {\n    return \"The CUDA driver failed to init because it didn't find any device\";\n  }\n};\n\n// Master list of all symbols we care about from libcuda.\n\n#define TP_FORALL_CUDA_SYMBOLS(_)                               \\\n  _(ctxGetCurrent, cuCtxGetCurrent, (CUcontext*))               \\\n  _(ctxSetCurrent, cuCtxSetCurrent, (CUcontext))                \\\n  _(deviceGet, cuDeviceGet, (CUdevice*, int))                   \\\n  _(deviceGetCount, cuDeviceGetCount, (int*))                   \\\n  _(deviceGetUuid, cuDeviceGetUuid, (CUuuid*, CUdevice))        \\\n  _(getErrorName, cuGetErrorName, (CUresult, const char**))     \\\n  _(getErrorString, cuGetErrorString, (CUresult, const char**)) \\\n  _(init, cuInit, (unsigned int))                               \\\n  _(memGetAddressRange_v2,                                      \\\n    cuMemGetAddressRange_v2,                                    \\\n    (CUdeviceptr*, size_t*, CUdeviceptr))                       \\\n  _(pointerGetAttribute,                                        \\\n    cuPointerGetAttribute,                                      \\\n    (void*, CUpointer_attribute, CUdeviceptr))\n\n// Wrapper for libcuda.\n\nclass CudaLib {\n private:\n  explicit CudaLib(DynamicLibraryHandle dlhandle)\n      : dlhandle_(std::move(dlhandle)) {}\n\n  DynamicLibraryHandle dlhandle_;\n\n#define TP_DECLARE_FIELD(method_name, function_name, args_types) \\\n  CUresult(*function_name##_ptr_) args_types = nullptr;\n  TP_FORALL_CUDA_SYMBOLS(TP_DECLARE_FIELD)\n#undef TP_DECLARE_FIELD\n\n public:\n  CudaLib() = default;\n\n#define TP_FORWARD_CALL(method_name, function_name, args_types)  \\\n  template <typename... Args>                                    \\\n  auto method_name(Args&&... args) const {                       \\\n    return (*function_name##_ptr_)(std::forward<Args>(args)...); \\\n  }\n  TP_FORALL_CUDA_SYMBOLS(TP_FORWARD_CALL)\n#undef TP_FORWARD_CALL\n\n  static std::tuple<Error, CudaLib> create() {\n    Error error;\n    DynamicLibraryHandle dlhandle;\n    // To keep things \"neat\" and contained, we open in \"local\" mode (as\n    // opposed to global) so that the cuda symbols can only be resolved\n    // through this handle and are not exposed (a.k.a., \"leaked\") to other\n    // shared objects.\n    std::tie(error, dlhandle) =\n        DynamicLibraryHandle::create(\"libcuda.so.1\", RTLD_LOCAL | RTLD_LAZY);\n    if (error) {\n      return std::make_tuple(std::move(error), CudaLib());\n    }\n    // Log at level 9 as we can't know whether this will be used in a transport\n    // or channel, thus err on the side of this being as low-level as possible\n    // because we don't expect this to be of interest that often.\n    TP_VLOG(9) << [&]() -> std::string {\n      std::string filename;\n      std::tie(error, filename) = dlhandle.getFilename();\n      if (error) {\n        return \"Couldn't determine location of shared library libcuda.so.1: \" +\n            error.what();\n      }\n      return \"Found shared library libcuda.so.1 at \" + filename;\n    }();\n    CudaLib lib(std::move(dlhandle));\n#define TP_LOAD_SYMBOL(method_name, function_name, args_types)       \\\n  {                                                                  \\\n    void* ptr;                                                       \\\n    std::tie(error, ptr) = lib.dlhandle_.loadSymbol(#function_name); \\\n    if (error) {                                                     \\\n      return std::make_tuple(std::move(error), CudaLib());           \\\n    }                                                                \\\n    TP_THROW_ASSERT_IF(ptr == nullptr);                              \\\n    lib.function_name##_ptr_ =                                       \\\n        reinterpret_cast<decltype(function_name##_ptr_)>(ptr);       \\\n  }\n    TP_FORALL_CUDA_SYMBOLS(TP_LOAD_SYMBOL)\n#undef TP_LOAD_SYMBOL\n    CUresult result = lib.init(0);\n    // If the driver doesn't find any devices it fails to init (beats me why)\n    // but we must support this case, by disabling the channels, rather than\n    // throwing. Hence we treat it as if we couldn't find the driver.\n    if (result == CUDA_ERROR_NO_DEVICE) {\n      return std::make_tuple(TP_CREATE_ERROR(NoDevicesError), CudaLib());\n    }\n    TP_CUDA_DRIVER_CHECK(lib, result);\n    return std::make_tuple(Error::kSuccess, std::move(lib));\n  }\n\n  CUresult memGetAddressRange(\n      CUdeviceptr* pbase,\n      size_t* psize,\n      CUdeviceptr dptr) const {\n    // NOTE: We are forwarding to cuMemGetAddressRange_v2() directly, because\n    // the name cuMemGetAddressRange is #defined to its _v2 variant in cuda.h.\n    // Calling the actual cuMemGetAddressRange() function here would lead to a\n    // CUDA_ERROR_INVALID_CONTEXT.\n    return memGetAddressRange_v2(pbase, psize, dptr);\n  }\n};\n\n#undef TP_FORALL_CUDA_SYMBOLS\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/cuda_loop.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/cuda_loop.h>\n\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/system.h>\n\nnamespace tensorpipe {\n\nnamespace {\n\nstruct CudaCallback {\n  CudaLoop& loop;\n  std::function<void(const Error&)> callback;\n\n  CudaCallback(CudaLoop& loop, std::function<void(const Error&)> callback)\n      : loop(loop), callback(std::move(callback)) {}\n};\n\nclass CudaLoopClosedError final : public BaseError {\n  std::string what() const override {\n    return \"CUDA loop already closed\";\n  }\n};\n\n} // namespace\n\nCudaLoop::CudaLoop() {\n  thread_ = std::thread([this]() {\n    setThreadName(\"TP_CUDA_callback_loop\");\n    processCallbacks();\n  });\n}\n\nCudaLoop::~CudaLoop() {\n  join();\n}\n\nvoid CudaLoop::join() {\n  close();\n\n  if (!joined_.exchange(true)) {\n    thread_.join();\n  }\n}\n\nvoid CudaLoop::close() {\n  std::unique_lock<std::mutex> lock(mutex_);\n  if (closed_) {\n    return;\n  }\n  closed_ = true;\n  cv_.notify_all();\n}\n\nvoid CudaLoop::processCallbacks() {\n  for (;;) {\n    std::deque<Operation> operations;\n    {\n      std::unique_lock<std::mutex> lock(mutex_);\n\n      if (operations_.empty()) {\n        if (closed_ && pendingOperations_ == 0) {\n          break;\n        } else {\n          cv_.wait(lock);\n        }\n      }\n\n      std::swap(operations, operations_);\n      pendingOperations_ -= operations.size();\n    }\n\n    for (auto& op : operations) {\n      op.callback(op.error);\n    }\n  }\n}\n\nvoid CudaLoop::addCallback(\n    int device,\n    cudaStream_t stream,\n    std::function<void(const Error&)> callback) {\n  {\n    std::unique_lock<std::mutex> lock(mutex_);\n    if (closed_) {\n      callback(TP_CREATE_ERROR(CudaLoopClosedError));\n      return;\n    }\n    ++pendingOperations_;\n  }\n\n  auto cudaCallback =\n      std::make_unique<CudaCallback>(*this, std::move(callback));\n  CudaDeviceGuard guard(device);\n  TP_CUDA_CHECK(cudaStreamAddCallback(\n      stream, runCudaCallback, cudaCallback.release(), 0));\n}\n\nvoid CUDART_CB CudaLoop::runCudaCallback(\n    cudaStream_t /* unused */,\n    cudaError_t cudaError,\n    void* callbackPtr) {\n  std::unique_ptr<CudaCallback> cudaCallback(\n      reinterpret_cast<CudaCallback*>(callbackPtr));\n  CudaLoop& loop = cudaCallback->loop;\n  {\n    std::unique_lock<std::mutex> lock(loop.mutex_);\n    auto error = Error::kSuccess;\n    if (cudaError != cudaSuccess) {\n      error = TP_CREATE_ERROR(CudaError, cudaError);\n    }\n    loop.operations_.push_back(\n        {std::move(cudaCallback->callback), std::move(error)});\n    loop.cv_.notify_all();\n  }\n  cudaCallback.reset();\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/cuda_loop.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <condition_variable>\n#include <deque>\n#include <functional>\n#include <list>\n#include <mutex>\n#include <thread>\n\n#include <cuda_runtime.h>\n\n#include <tensorpipe/common/error_macros.h>\n\nnamespace tensorpipe {\n\nclass CudaLoop {\n  struct Operation {\n    std::function<void(const Error&)> callback;\n    Error error;\n  };\n\n public:\n  CudaLoop();\n\n  ~CudaLoop();\n\n  void join();\n  void close();\n\n  void addCallback(\n      int device,\n      cudaStream_t stream,\n      std::function<void(const Error&)> callback);\n\n private:\n  std::thread thread_;\n  std::deque<Operation> operations_;\n  std::mutex mutex_;\n  std::condition_variable cv_;\n  uint64_t pendingOperations_{0};\n\n  bool closed_{false};\n  std::atomic<bool> joined_{false};\n\n  void processCallbacks();\n\n  // Proxy static method for cudaStreamAddCallback(), which does not accept\n  // lambdas.\n  static void CUDART_CB runCudaCallback(\n      cudaStream_t stream,\n      cudaError_t cudaError,\n      void* callbackPtr);\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/deferred_executor.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <deque>\n#include <exception>\n#include <functional>\n#include <future>\n#include <memory>\n#include <mutex>\n#include <string>\n#include <thread>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/system.h>\n\nnamespace tensorpipe {\n\n// Dealing with thread-safety using per-object mutexes is prone to deadlocks\n// because of reentrant calls (both \"upward\", when invoking a callback that\n// calls back into a method of the object, and \"downward\", when passing a\n// callback to an operation of another object that calls it inline) and lock\n// inversions (object A calling a method of object B and attempting to acquire\n// its lock, with the reverse happening at the same time). Using a \"loop\" model,\n// where operations aren't called inlined and piled up on the stack but instead\n// deferred to a later iteration of the loop, solves many of these issues. This\n// abstract interface defines the essential methods we need such event loops to\n// provide.\nclass DeferredExecutor {\n public:\n  using TTask = std::function<void()>;\n\n  virtual void deferToLoop(TTask fn) = 0;\n\n  virtual bool inLoop() const = 0;\n\n  // Prefer using deferToLoop over runInLoop when you don't need to wait for the\n  // result.\n  template <typename F>\n  void runInLoop(F&& fn) {\n    // When called from the event loop thread itself (e.g., from a callback),\n    // deferring would cause a deadlock because the given callable can only be\n    // run when the loop is allowed to proceed. On the other hand, it means it\n    // is thread-safe to run it immediately. The danger here however is that it\n    // can lead to an inconsistent order between operations run from the event\n    // loop, from outside of it, and deferred.\n    if (inLoop()) {\n      fn();\n    } else {\n      // Must use a copyable wrapper around std::promise because\n      // we use it from a std::function which must be copyable.\n      auto promise = std::make_shared<std::promise<void>>();\n      auto future = promise->get_future();\n      // Marked as mutable because the fn might hold some state (e.g., the\n      // closure of a lambda) which it might want to modify.\n      deferToLoop([promise, fn{std::forward<F>(fn)}]() mutable {\n        try {\n          fn();\n          promise->set_value();\n        } catch (...) {\n          promise->set_exception(std::current_exception());\n        }\n      });\n      future.get();\n    }\n  }\n\n  virtual ~DeferredExecutor() = default;\n};\n\n// Transports typically have their own thread they can use as deferred executors\n// but many objects (like pipes) don't naturally own threads and introducing\n// them would also mean introducing latency costs due to context switching.\n// In order to give these objects a loop they can use to defer their operations\n// to, we can have them temporarily hijack the calling thread and repurpose it\n// to run an ephemeral loop on which to run the original task and all the ones\n// that a task running on the loop chooses to defer to a later iteration of the\n// loop, recursively. Once all these tasks have been completed, the makeshift\n// loop is dismantled and control of the thread is returned to the caller.\n// FIXME Rename this to OnDemandDeferredExecutor?\nclass OnDemandDeferredExecutor : public DeferredExecutor {\n public:\n  bool inLoop() const override {\n    // If the current thread is already holding the lock (i.e., it's already in\n    // this function somewhere higher up in the stack) then this check won't\n    // race and we will detect it correctly. If this is not the case, then this\n    // check may race with another thread, but that's nothing to worry about\n    // because in either case the outcome will be negative.\n    return currentLoop_ == std::this_thread::get_id();\n  }\n\n  void deferToLoop(TTask fn) override {\n    {\n      std::unique_lock<std::mutex> lock(mutex_);\n      pendingTasks_.push_back(std::move(fn));\n      if (currentLoop_ != std::thread::id()) {\n        return;\n      }\n      currentLoop_ = std::this_thread::get_id();\n    }\n\n    while (true) {\n      TTask task;\n      {\n        std::unique_lock<std::mutex> lock(mutex_);\n        if (pendingTasks_.empty()) {\n          currentLoop_ = std::thread::id();\n          return;\n        }\n        task = std::move(pendingTasks_.front());\n        pendingTasks_.pop_front();\n      }\n      task();\n    }\n  }\n\n private:\n  std::mutex mutex_;\n  std::atomic<std::thread::id> currentLoop_{std::thread::id()};\n  std::deque<TTask> pendingTasks_;\n};\n\nclass EventLoopDeferredExecutor : public virtual DeferredExecutor {\n public:\n  void deferToLoop(TTask fn) override {\n    {\n      std::unique_lock<std::mutex> lock(mutex_);\n      if (likely(isThreadConsumingDeferredFunctions_)) {\n        fns_.push_back(std::move(fn));\n        wakeupEventLoopToDeferFunction();\n        return;\n      }\n    }\n    // Must call it without holding the lock, as it could cause a reentrant\n    // call.\n    onDemandLoop_.deferToLoop(std::move(fn));\n  }\n\n  inline bool inLoop() const override {\n    {\n      std::unique_lock<std::mutex> lock(mutex_);\n      if (likely(isThreadConsumingDeferredFunctions_)) {\n        return std::this_thread::get_id() == thread_.get_id();\n      }\n    }\n    return onDemandLoop_.inLoop();\n  }\n\n protected:\n  // This is the actual long-running event loop, which is implemented by\n  // subclasses and called inside the thread owned by this parent class.\n  virtual void eventLoop() = 0;\n\n  // This is called after the event loop terminated, still within the thread\n  // that used to run that event loop. It will be called after this class has\n  // transitioned control to the on-demand deferred executor. It thus allows to\n  // clean up any resources without worrying about new work coming in.\n  virtual void cleanUpLoop() {}\n\n  // This function is called by the parent class when a function is deferred to\n  // it, and must be implemented by subclasses, which are required to have their\n  // event loop call runDeferredFunctionsFromEventLoop as soon as possible. This\n  // function is guaranteed to be called once per function deferral (in case\n  // subclasses want to keep count).\n  virtual void wakeupEventLoopToDeferFunction() = 0;\n\n  // Called by subclasses to have the parent class start the thread. We cannot\n  // implicitly call this in the parent class's constructor because it could\n  // lead to a race condition between the event loop (run by the thread) and the\n  // subclass's constructor (which is executed after the parent class's one).\n  // Hence this method should be invoked at the end of the subclass constructor.\n  void startThread(std::string threadName) {\n    // FIXME Once we've fixed the viability (by having a factory function return\n    // a nullptr, instead of having a method on the context), remove this, and\n    // instead add a safety check in deferToLoop that ensures that within the\n    // isThreadConsumingDeferredFunctions_ branch the thread is joinable, i.e.,\n    // up and still running.\n    {\n      std::unique_lock<std::mutex> lock(mutex_);\n      TP_DCHECK(!isThreadConsumingDeferredFunctions_);\n      TP_DCHECK(!thread_.joinable());\n      TP_DCHECK(fns_.empty());\n      isThreadConsumingDeferredFunctions_ = true;\n    }\n    thread_ = std::thread(\n        &EventLoopDeferredExecutor::loop, this, std::move(threadName));\n  }\n\n  // This is basically the reverse operation of the above, and is needed for the\n  // same (reversed) reason. Note that this only waits for the thread to finish:\n  // the subclass must have its own way of telling its event loop to stop and\n  // return control.\n  void joinThread() {\n    thread_.join();\n  }\n\n  // Must be called by the subclass after it was woken up. Even if multiple\n  // functions were deferred, this method only needs to be called once. However,\n  // care must be taken to avoid races between this call and new wakeups. This\n  // method also returns the number of functions it executed, in case the\n  // subclass is keeping count.\n  size_t runDeferredFunctionsFromEventLoop() {\n    decltype(fns_) fns;\n\n    {\n      std::unique_lock<std::mutex> lock(mutex_);\n      std::swap(fns, fns_);\n    }\n\n    for (auto& fn : fns) {\n      fn();\n    }\n\n    return fns.size();\n  }\n\n private:\n  void loop(std::string threadName) {\n    setThreadName(std::move(threadName));\n\n    eventLoop();\n\n    // The loop is winding down and \"handing over\" control to the on demand\n    // loop. But it can only do so safely once there are no pending deferred\n    // functions, as otherwise those may risk never being executed.\n    while (true) {\n      decltype(fns_) fns;\n\n      {\n        std::unique_lock<std::mutex> lock(mutex_);\n        if (fns_.empty()) {\n          isThreadConsumingDeferredFunctions_ = false;\n          break;\n        }\n        std::swap(fns, fns_);\n      }\n\n      for (auto& fn : fns) {\n        fn();\n      }\n    }\n\n    cleanUpLoop();\n  }\n\n  std::thread thread_;\n\n  // Whether the thread is taking care of running the deferred functions\n  //\n  // This is part of what can only be described as a hack. Sometimes, even when\n  // using the API as intended, objects try to defer tasks to the loop after\n  // that loop has been closed and joined. Since those tasks may be lambdas that\n  // captured shared_ptrs to the objects in their closures, this may lead to a\n  // reference cycle and thus a leak. Our hack is to have this flag to record\n  // when we can no longer defer tasks to the loop and in that case we just run\n  // those tasks inline. In order to keep ensuring the single-threadedness\n  // assumption of our model (which is what we rely on to be safe from race\n  // conditions) we use an on-demand loop. This flag starts as false as in some\n  // cases (like non-viable transports) the thread may never be started and thus\n  // we want the on-demand loop to be engaged from the beginning.\n  bool isThreadConsumingDeferredFunctions_{false};\n  OnDemandDeferredExecutor onDemandLoop_;\n\n  // Mutex to guard the deferring and the running of functions.\n  mutable std::mutex mutex_;\n\n  // List of deferred functions to run when the loop is ready.\n  std::vector<std::function<void()>> fns_;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/defs.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <sys/time.h>\n#include <sys/types.h>\n#include <unistd.h>\n\n#include <cstring>\n#include <ctime>\n#include <functional>\n#include <iomanip>\n#include <iostream>\n#include <limits>\n#include <sstream>\n#include <string>\n#include <system_error>\n\n// Branch hint macros. C++20 will include them as part of language.\n#define likely(x) __builtin_expect((x) ? 1 : 0, 1)\n#define unlikely(x) __builtin_expect((x) ? 1 : 0, 0)\n\n/// Auxiliar class to build exception, fill up it's what message and throw\n/// in a single line. Usually uses as r-value so that destructor is called\n/// at end of line that created it, throwing the desired exception.\n/// (See TP_THROW).\nnamespace tensorpipe {\ntemplate <class TException>\nclass ExceptionThrower final {\n public:\n  template <class... TArgs>\n  ExceptionThrower(TArgs&&... nonWhat) {\n    exBuilder_ = [&](const std::string& what) {\n      return TException(std::move(nonWhat)..., what);\n    };\n  }\n\n  // Throw exception on destructor, when l-value instance goes of scope\n  // and stream has been written. Use noexcept(false) to inform the compiler\n  // that it's ok to throw in destructor.\n  ~ExceptionThrower() noexcept(false) {\n    throw exBuilder_(oss_.str() + \"\\\"\");\n  }\n\n  std::ostream& getStream() {\n    return oss_;\n  }\n\n protected:\n  std::function<TException(const std::string&)> exBuilder_;\n  std::ostringstream oss_;\n};\n} // namespace tensorpipe\n\n//\n// Macros to throw commonly used exceptions.\n//\n#define TP_STRINGIFY(s) #s\n#define TP_EXPAND_TO_STR(s) TP_STRINGIFY(s)\n\n// Strip all leading components up to the *last* occurrence of \"tensorpipe/\".\n// This removes all the system-specific prefixes added by the compiler.\n#define TP_TRIM_FILENAME(s)                                         \\\n  [](const char* filename) -> const char* {                         \\\n    while (true) {                                                  \\\n      const char* match = std::strstr(filename + 1, \"tensorpipe/\"); \\\n      if (match == nullptr) {                                       \\\n        break;                                                      \\\n      }                                                             \\\n      filename = match;                                             \\\n    }                                                               \\\n    return filename;                                                \\\n  }(s)\n\n#define TP_LOG_LOC \\\n  TP_TRIM_FILENAME(__FILE__) << \":\" << TP_EXPAND_TO_STR(__LINE__)\n#define TP_LOG_PREFFIX \"In \" << __func__ << \" at \" << TP_LOG_LOC\n\n#define TP_THROW(ex_type, ...)                                     \\\n  ::tensorpipe::ExceptionThrower<ex_type>(__VA_ARGS__).getStream() \\\n      << TP_LOG_PREFFIX << \" \\\"\"\n\n#define TP_THROW_EINVAL() TP_THROW(std::invalid_argument)\n\n#define TP_THROW_SYSTEM(err) \\\n  TP_THROW(std::system_error, err, std::system_category())\n#define TP_THROW_SYSTEM_IF(cond, err) \\\n  if (unlikely(cond))                 \\\n  TP_THROW_SYSTEM(err)\n\n#define TP_THROW_SYSTEM_CODE(err) TP_THROW(std::system_error, err)\n#define TP_THROW_SYSTEM_CODE_IF(cond, err) \\\n  if (unlikely(cond))                      \\\n  TP_THROW_SYSTEM_CODE(err) << TP_STRINGIFY(cond)\n\n#define TP_THROW_ASSERT() TP_THROW(std::runtime_error)\n#define TP_THROW_ASSERT_IF(cond) \\\n  if (unlikely(cond))            \\\n  TP_THROW_ASSERT() << TP_STRINGIFY(cond)\n\n// Conditional throwing exception\n#define TP_THROW_IF_NULLPTR(ptr) \\\n  if (unlikely(ptr == nullptr))  \\\n  TP_THROW_EINVAL() << TP_STRINGIFY(ptr) << \" has nullptr value\"\n\n// Safe-cast to std::error_code\nnamespace tensorpipe {\ninline std::error_code toErrorCode(ssize_t e) {\n  if (unlikely(e <= 0)) {\n    TP_THROW_EINVAL() << \"Error not a positive number. \"\n                      << \"Is this value really an error?\";\n  } else if (unlikely(e > std::numeric_limits<int>::max())) {\n    TP_THROW_EINVAL() << \"Error out of range. Is this really an error?\";\n  }\n  return {static_cast<int>(e), std::system_category()};\n}\n} // namespace tensorpipe\n\n//\n// Simple logging to stderr. This macros can be replaced if a more\n// sophisticated logging is used in the future.\n// Currently, tensorpipe is meant be used as shared library and to use\n// exceptions for error handling, so the need for logging in\n// the library is reduced.\n//\nnamespace tensorpipe {\nclass LogEntry final {\n public:\n  explicit LogEntry(char type) {\n    oss_ << type;\n\n    // In C++17 use std::timespec.\n    struct timeval tv;\n    // In C++17 use std::timespec_get.\n    gettimeofday(&tv, nullptr);\n    struct std::tm tm;\n    // Need to use localtime_r as std::localtime may not be thread-safe.\n    localtime_r(&tv.tv_sec, &tm);\n    oss_ << std::setfill('0') << std::setw(2) << 1 + tm.tm_mon << std::setw(2)\n         << tm.tm_mday << ' ' << std::setw(2) << tm.tm_hour << ':'\n         << std::setw(2) << tm.tm_min << ':' << std::setw(2) << tm.tm_sec << '.'\n         << std::setw(6) << tv.tv_usec;\n\n    // The glog format uses the thread ID but it's painful to get (there is a\n    // gettid syscall, but it's not exposed in glibc) so we use the PID instead.\n    oss_ << ' ' << std::setfill(' ') << std::setw(5) << getpid();\n  }\n\n  ~LogEntry() noexcept {\n    // Multiple threads or processes writing to the same log (e.g., stderr)\n    // might lead to interleaved text and thus garbled output. It seems that a\n    // single write syscall is \"rather\" atomic so instead of issuing a separate\n    // write for the trailing newline we append it to the message and write them\n    // together.\n    oss_ << std::endl;\n    std::cerr << oss_.str();\n  }\n\n  std::ostream& getStream() {\n    return oss_;\n  }\n\n protected:\n  std::ostringstream oss_;\n};\n} // namespace tensorpipe\n\n#define TP_LOG_DEBUG() \\\n  ::tensorpipe::LogEntry('V').getStream() << ' ' << TP_LOG_LOC << \"] \"\n#define TP_LOG_INFO() \\\n  ::tensorpipe::LogEntry('I').getStream() << ' ' << TP_LOG_LOC << \"] \"\n#define TP_LOG_WARNING() \\\n  ::tensorpipe::LogEntry('W').getStream() << ' ' << TP_LOG_LOC << \"] \"\n#define TP_LOG_ERROR() \\\n  ::tensorpipe::LogEntry('E').getStream() << ' ' << TP_LOG_LOC << \"] \"\n\n#define TP_LOG_DEBUG_IF(cond) \\\n  if (unlikely(cond))         \\\n  TP_LOG_DEBUG()\n#define TP_LOG_INFO_IF(cond) \\\n  if (unlikely(cond))        \\\n  TP_LOG_INFO()\n#define TP_LOG_WARNING_IF(cond) \\\n  if (unlikely(cond))           \\\n  TP_LOG_WARNING()\n#define TP_LOG_ERROR_IF(cond) \\\n  if (unlikely(cond))         \\\n  TP_LOG_ERROR()\n\n#define __TP_EXPAND_OPD(opd) TP_STRINGIFY(opd) << \"(\" << (opd) << \")\"\n\n//\n// Debug checks.\n// Note that non-debug checks are not provided because developers\n// must handle all errors explicitly.\n//\n\n#define __TP_DCHECK(a)  \\\n  if (unlikely(!((a)))) \\\n  TP_THROW_ASSERT() << \"Expected true for \" << __TP_EXPAND_OPD(a)\n\n#define __TP_DCHECK_CMP(a, b, op)                        \\\n  if (unlikely(!((a)op(b))))                             \\\n  TP_THROW_ASSERT() << \"Expected \" << __TP_EXPAND_OPD(a) \\\n                    << \" \" TP_STRINGIFY(op) << \" \" << __TP_EXPAND_OPD(b)\n\n// Expand macro only in debug mode.\n#ifdef NDEBUG\n\n#define _TP_DLOG() \\\n  while (false)    \\\n  TP_LOG_DEBUG()\n\n#define _TP_DCHECK(a) \\\n  while (false)       \\\n  __TP_DCHECK(a)\n\n#define _TP_DCHECK_CMP(a, b, op) \\\n  while (false)                  \\\n  __TP_DCHECK_CMP(a, b, op)\n\n#else\n\n#define _TP_DLOG() TP_LOG_DEBUG()\n\n#define _TP_DCHECK(a) __TP_DCHECK(a)\n\n#define _TP_DCHECK_CMP(a, b, op) __TP_DCHECK_CMP(a, b, op)\n\n#endif\n\n// Public API for debug logging.\n#define TP_DLOG() _TP_DLOG()\n\n// Public API for debug checks.\n#define TP_DCHECK(a) _TP_DCHECK(a)\n#define TP_DCHECK_EQ(a, b) _TP_DCHECK_CMP(a, b, ==)\n#define TP_DCHECK_NE(a, b) _TP_DCHECK_CMP(a, b, !=)\n#define TP_DCHECK_LT(a, b) _TP_DCHECK_CMP(a, b, <)\n#define TP_DCHECK_LE(a, b) _TP_DCHECK_CMP(a, b, <=)\n#define TP_DCHECK_GT(a, b) _TP_DCHECK_CMP(a, b, >)\n#define TP_DCHECK_GE(a, b) _TP_DCHECK_CMP(a, b, >=)\n\n//\n// Verbose logging.\n// Some logging is helpful to diagnose tricky production issues but is too\n// verbose to keep on all the time. It also should not be controlled by the\n// debug flags, as we want to allow it to be enabled in production builds.\n//\n\n// The level of each TP_VLOG call should reflect where the object issuing it is\n// located in the stack , and whether it's a call that involves handling\n// requests from objects higher up, or issuing requests to objects lower down.\n// This brings us to the following classification:\n// - level 1 is for requests that core classes receive from the user\n// - level 2 is for generic core classes stuff\n// - level 3 is for requests that core classes issue to channels/transports\n// - level 4 is for requests that channels receive from core classes\n// - level 5 is for generic channels stuff\n// - level 6 is for requests that channels issue to transports\n// - level 7 is for requests that transports receive from core classes/channels\n// - level 8 is for generic transports stuff\n// - level 9 is for how transports deal with system resources\n\nnamespace tensorpipe {\ninline unsigned long getVerbosityLevelInternal() {\n  char* levelStr = std::getenv(\"TP_VERBOSE_LOGGING\");\n  if (levelStr == nullptr) {\n    return 0;\n  }\n  return std::strtoul(levelStr, /*str_end=*/nullptr, /*base=*/10);\n}\n\ninline unsigned long getVerbosityLevel() {\n  static unsigned long level = getVerbosityLevelInternal();\n  return level;\n}\n} // namespace tensorpipe\n\n#define TP_VLOG(level) TP_LOG_DEBUG_IF(level <= getVerbosityLevel())\n\n//\n// Argument checks\n//\n#define TP_ARG_CHECK(a) \\\n  if (unlikely(!((a)))) \\\n  TP_THROW_EINVAL() << \"Expected argument to be true: \" << __TP_EXPAND_OPD(a)\n\n#define _TP_ARG_CMP(a, b, op)                                     \\\n  if (unlikely(!((a)op(b))))                                      \\\n  TP_THROW_EINVAL() << \"Expected argument \" << __TP_EXPAND_OPD(a) \\\n                    << \" \" TP_STRINGIFY(_op_) << \" \" << __TP_EXPAND_OPD(b)\n\n#define TP_ARG_CHECK_EQ(a, b) _TP_ARG_CMP(a, b, ==)\n#define TP_ARG_CHECK_NE(a, b) _TP_ARG_CMP(a, b, !=)\n#define TP_ARG_CHECK_LT(a, b) _TP_ARG_CMP(a, b, <)\n#define TP_ARG_CHECK_LE(a, b) _TP_ARG_CMP(a, b, <=)\n#define TP_ARG_CHECK_GT(a, b) _TP_ARG_CMP(a, b, >)\n#define TP_ARG_CHECK_GE(a, b) _TP_ARG_CMP(a, b, >=)\n\n// Define DEXCEPT macro that is noexcept only in debug mode.\n#ifdef NDEBUG\n#define DEXCEPT noexcept(true)\n#else\n#define DEXCEPT noexcept(false)\n#endif\n\n#define TP_LOG_EXCEPTION(e)                         \\\n  TP_LOG_ERROR() << \"Exception in \" << __FUNCTION__ \\\n                 << \" . Message: \" << e.what()\n"
  },
  {
    "path": "tensorpipe/common/device.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <sstream>\n#include <stdexcept>\n#include <string>\n\nnamespace tensorpipe {\n\nconst std::string kCpuDeviceType{\"cpu\"};\nconst std::string kCudaDeviceType{\"cuda\"};\n\nstruct Device {\n  std::string type;\n  int index;\n\n  // This pointless constructor is needed to work around a bug in GCC 5.5 (and\n  // possibly other versions). It appears to be needed in the nop types that\n  // are used inside nop::Optional.\n  Device() {}\n\n  Device(std::string type, int index) : type(std::move(type)), index(index) {}\n\n  std::string toString() const {\n    std::stringstream ss;\n    ss << type << \":\" << index;\n    return ss.str();\n  }\n\n  bool operator==(const Device& other) const {\n    return type == other.type && index == other.index;\n  }\n};\n\n} // namespace tensorpipe\n\nnamespace std {\n\ntemplate <>\nstruct hash<::tensorpipe::Device> {\n  size_t operator()(const ::tensorpipe::Device& device) const noexcept {\n    return std::hash<std::string>{}(device.toString());\n  }\n};\n\ntemplate <>\nstruct hash<std::pair<::tensorpipe::Device, ::tensorpipe::Device>> {\n  size_t operator()(const std::pair<::tensorpipe::Device, ::tensorpipe::Device>&\n                        p) const noexcept {\n    size_t h1 = std::hash<::tensorpipe::Device>{}(p.first);\n    size_t h2 = std::hash<::tensorpipe::Device>{}(p.second);\n    // Shifting one hash to avoid collisions between (a, b) and (b, a).\n    return h1 ^ (h2 << 1);\n  }\n};\n\n} // namespace std\n"
  },
  {
    "path": "tensorpipe/common/dl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <dlfcn.h>\n#include <link.h>\n\n#include <array>\n#include <climits>\n#include <cstdlib>\n#include <memory>\n#include <string>\n#include <tuple>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/error_macros.h>\n\nnamespace tensorpipe {\n\nclass DlError final : public BaseError {\n public:\n  explicit DlError(char* error) : error_(error) {}\n\n  std::string what() const override {\n    return error_;\n  }\n\n private:\n  std::string error_;\n};\n\nclass DynamicLibraryHandle {\n public:\n  DynamicLibraryHandle() = default;\n\n  static std::tuple<Error, DynamicLibraryHandle> create(\n      const char* filename,\n      int flags) {\n    void* ptr = ::dlopen(filename, flags);\n    if (ptr == nullptr) {\n      return std::make_tuple(\n          TP_CREATE_ERROR(DlError, ::dlerror()), DynamicLibraryHandle());\n    }\n    return std::make_tuple(Error::kSuccess, DynamicLibraryHandle(ptr));\n  }\n\n  bool hasValue() const {\n    return ptr_ != nullptr;\n  }\n\n  std::tuple<Error, void*> loadSymbol(const char* name) {\n    // Since dlsym doesn't return a specific value to signal errors (because\n    // NULL is a valid return value), we need to detect errors by calling\n    // dlerror and checking whether it returns a string or not (i.e., NULL). But\n    // in order to do so, we must first reset the error, in case one was already\n    // recorded.\n    ::dlerror();\n    void* ptr = ::dlsym(ptr_.get(), name);\n    char* err = ::dlerror();\n    if (err != nullptr) {\n      return std::make_tuple(TP_CREATE_ERROR(DlError, err), nullptr);\n    }\n    return std::make_tuple(Error::kSuccess, ptr);\n  }\n\n  std::tuple<Error, std::string> getFilename() {\n    struct link_map* linkMap;\n    int rv = ::dlinfo(ptr_.get(), RTLD_DI_LINKMAP, &linkMap);\n    if (rv < 0) {\n      return std::make_tuple(\n          TP_CREATE_ERROR(DlError, ::dlerror()), std::string());\n    }\n    std::array<char, PATH_MAX> path;\n    char* resolvedPath = ::realpath(linkMap->l_name, path.data());\n    if (resolvedPath == nullptr) {\n      return std::make_tuple(\n          TP_CREATE_ERROR(SystemError, \"realpath\", errno), std::string());\n    }\n    TP_DCHECK(resolvedPath == path.data());\n    return std::make_tuple(Error::kSuccess, std::string(path.data()));\n  }\n\n private:\n  struct Deleter {\n    void operator()(void* ptr) {\n      int res = ::dlclose(ptr);\n      TP_THROW_ASSERT_IF(res != 0) << \"dlclose() failed: \" << ::dlerror();\n    }\n  };\n\n  DynamicLibraryHandle(void* ptr) : ptr_(ptr, Deleter{}) {}\n\n  std::unique_ptr<void, Deleter> ptr_;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/epoll_loop.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/epoll_loop.h>\n\n#include <sys/eventfd.h>\n\n#include <tensorpipe/common/system.h>\n\nnamespace tensorpipe {\n\nEpollLoop::EpollLoop(DeferredExecutor& deferredExecutor)\n    : deferredExecutor_(deferredExecutor) {\n  {\n    auto rv = ::epoll_create(1);\n    TP_THROW_SYSTEM_IF(rv == -1, errno);\n    epollFd_ = Fd(rv);\n  }\n  {\n    auto rv = ::eventfd(0, EFD_NONBLOCK);\n    TP_THROW_SYSTEM_IF(rv == -1, errno);\n    eventFd_ = Fd(rv);\n  }\n\n  // Register the eventfd with epoll.\n  {\n    struct epoll_event ev;\n    ev.events = EPOLLIN;\n    ev.data.u64 = 0;\n    auto rv = ::epoll_ctl(epollFd_.fd(), EPOLL_CTL_ADD, eventFd_.fd(), &ev);\n    TP_THROW_SYSTEM_IF(rv == -1, errno);\n  }\n\n  // Start epoll(2) thread.\n  thread_ = std::thread(&EpollLoop::loop, this);\n}\n\nvoid EpollLoop::close() {\n  if (!closed_.exchange(true)) {\n    wakeup();\n  }\n}\n\nvoid EpollLoop::join() {\n  close();\n\n  if (!joined_.exchange(true)) {\n    thread_.join();\n  }\n}\n\nEpollLoop::~EpollLoop() {\n  join();\n\n  // Unregister the eventfd with epoll.\n  {\n    auto rv = ::epoll_ctl(epollFd_.fd(), EPOLL_CTL_DEL, eventFd_.fd(), nullptr);\n    TP_THROW_SYSTEM_IF(rv == -1, errno);\n  }\n}\n\nvoid EpollLoop::registerDescriptor(\n    int fd,\n    int events,\n    std::shared_ptr<EventHandler> h) {\n  TP_DCHECK(deferredExecutor_.inLoop());\n\n  std::lock_guard<std::mutex> lock(handlersMutex_);\n\n  uint64_t record = nextRecord_++;\n\n  struct epoll_event ev;\n  ev.events = events;\n  ev.data.u64 = record;\n\n  auto fdIter = fdToRecord_.find(fd);\n  if (fdIter == fdToRecord_.end()) {\n    fdToRecord_.emplace(fd, record);\n    recordToHandler_.emplace(record, h);\n\n    auto rv = ::epoll_ctl(epollFd_.fd(), EPOLL_CTL_ADD, fd, &ev);\n    TP_THROW_SYSTEM_IF(rv == -1, errno);\n  } else {\n    uint64_t oldRecord = fdIter->second;\n    fdIter->second = record;\n    recordToHandler_.erase(oldRecord);\n    recordToHandler_.emplace(record, h);\n\n    auto rv = ::epoll_ctl(epollFd_.fd(), EPOLL_CTL_MOD, fd, &ev);\n    TP_THROW_SYSTEM_IF(rv == -1, errno);\n  }\n}\n\nvoid EpollLoop::unregisterDescriptor(int fd) {\n  TP_DCHECK(deferredExecutor_.inLoop());\n\n  std::lock_guard<std::mutex> lock(handlersMutex_);\n\n  auto fdIter = fdToRecord_.find(fd);\n  TP_DCHECK(fdIter != fdToRecord_.end());\n  uint64_t oldRecord = fdIter->second;\n  fdToRecord_.erase(fdIter);\n  recordToHandler_.erase(oldRecord);\n\n  auto rv = ::epoll_ctl(epollFd_.fd(), EPOLL_CTL_DEL, fd, nullptr);\n  TP_THROW_SYSTEM_IF(rv == -1, errno);\n\n  // Maybe we're done and the event loop is waiting for the last handlers to\n  // be unregistered before terminating, so just in case we wake it up.\n  if (fdToRecord_.empty()) {\n    wakeup();\n  }\n}\n\nvoid EpollLoop::wakeup() {\n  // Perform a write to eventfd to wake up epoll_wait(2).\n  eventFd_.writeOrThrow<uint64_t>(1);\n}\n\nbool EpollLoop::hasRegisteredHandlers() {\n  std::lock_guard<std::mutex> lock(handlersMutex_);\n  TP_DCHECK_EQ(fdToRecord_.size(), recordToHandler_.size());\n  return !fdToRecord_.empty();\n}\n\nvoid EpollLoop::loop() {\n  setThreadName(\"TP_IBV_loop\");\n\n  // Stop when another thread has asked the loop the close and when all\n  // handlers have been unregistered except for the wakeup eventfd one.\n  while (!closed_ || hasRegisteredHandlers()) {\n    // Use fixed epoll_event capacity for every call.\n    std::vector<struct epoll_event> epollEvents(kCapacity);\n\n    // Block waiting for something to happen...\n    auto nfds =\n        ::epoll_wait(epollFd_.fd(), epollEvents.data(), epollEvents.size(), -1);\n    if (nfds == -1) {\n      if (errno == EINTR) {\n        continue;\n      }\n      TP_THROW_SYSTEM(errno);\n    }\n\n    // Always immediately read from the eventfd so that it is no longer readable\n    // on the next call to epoll_wait(2). As it's opened in non-blocking mode,\n    // reading from it if its value is zero just return EAGAIN. Reset it before\n    // invoking any of the callbacks, so that if they perform a wakeup they will\n    // wake up the next iteration of epoll_wait(2).\n    {\n      uint64_t val;\n      auto rv = eventFd_.read(reinterpret_cast<void*>(&val), sizeof(val));\n      TP_DCHECK(\n          (rv == -1 && errno == EAGAIN) || (rv == sizeof(val) && val > 0));\n    }\n\n    // Resize based on actual number of events.\n    epollEvents.resize(nfds);\n\n    // Defer handling to reactor and wait for it to process these events.\n    deferredExecutor_.runInLoop(\n        [this, epollEvents{std::move(epollEvents)}]() mutable {\n          handleEpollEventsFromLoop(std::move(epollEvents));\n        });\n  }\n}\n\nvoid EpollLoop::handleEpollEventsFromLoop(\n    std::vector<struct epoll_event> epollEvents) {\n  TP_DCHECK(deferredExecutor_.inLoop());\n\n  // Process events returned by epoll_wait(2).\n  for (const auto& event : epollEvents) {\n    const uint64_t record = event.data.u64;\n\n    // Make a copy so that if the handler unregisters itself as it runs it will\n    // still be kept alive by our copy of the shared_ptr.\n    std::shared_ptr<EventHandler> handler;\n    {\n      std::unique_lock<std::mutex> handlersLock(handlersMutex_);\n      const auto recordIter = recordToHandler_.find(record);\n      if (recordIter == recordToHandler_.end()) {\n        continue;\n      }\n      handler = recordIter->second;\n    }\n\n    handler->handleEventsFromLoop(event.events);\n  }\n}\n\nstd::string EpollLoop::formatEpollEvents(uint32_t events) {\n  std::string res;\n  if (events & EPOLLIN) {\n    res = res.empty() ? \"IN\" : res + \" | IN\";\n    events &= ~EPOLLIN;\n  }\n  if (events & EPOLLOUT) {\n    res = res.empty() ? \"OUT\" : res + \" | OUT\";\n    events &= ~EPOLLOUT;\n  }\n  if (events & EPOLLERR) {\n    res = res.empty() ? \"ERR\" : res + \" | ERR\";\n    events &= ~EPOLLERR;\n  }\n  if (events & EPOLLHUP) {\n    res = res.empty() ? \"HUP\" : res + \" | HUP\";\n    events &= ~EPOLLHUP;\n  }\n  if (events > 0) {\n    std::string eventsStr = std::to_string(events);\n    res = res.empty() ? eventsStr : res + \" | \" + eventsStr;\n  }\n  return res;\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/epoll_loop.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <memory>\n#include <mutex>\n#include <string>\n#include <thread>\n#include <unordered_map>\n#include <vector>\n\n#include <sys/epoll.h>\n\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/fd.h>\n\nnamespace tensorpipe {\n\nclass EpollLoop final {\n public:\n  // Abstract base class called by the epoll(2) event loop.\n  //\n  // Dispatch to multiple types is needed because we must deal with a\n  // few listening sockets and an eventfd(2) per connection.\n  //\n  class EventHandler {\n   public:\n    virtual ~EventHandler() = default;\n\n    virtual void handleEventsFromLoop(int events) = 0;\n  };\n\n  explicit EpollLoop(DeferredExecutor& deferredExecutor);\n\n  // Register file descriptor with event loop.\n  //\n  // Trigger the handler if any of the epoll events in the `events`\n  // mask occurs. If an event is triggered, the loop first acquires a\n  // copy of the shared_ptr to the handler before calling into its\n  // handler function. This ensures that the handler is alive for the\n  // duration of this function.\n  //\n  void registerDescriptor(int fd, int events, std::shared_ptr<EventHandler> h);\n\n  // Unregister file descriptor from event loop.\n  //\n  // This resets the shared_ptr to the event handler that was registered\n  // in `registerDescriptor`. Upon returning, the handler can no\n  // longer be called, even if there were pending events for the file\n  // descriptor. Only if the loop had acquired a shared_ptr to the\n  // handler prior to this function being called, can the handler\n  // function still be called.\n  //\n  void unregisterDescriptor(int fd);\n\n  void close();\n\n  // Tell loop to terminate when no more handlers remain.\n  void join();\n\n  ~EpollLoop();\n\n  static std::string formatEpollEvents(uint32_t events);\n\n private:\n  static constexpr auto kCapacity = 64;\n\n  // The reactor is used to process events for this loop.\n  DeferredExecutor& deferredExecutor_;\n\n  // Wake up the event loop.\n  void wakeup();\n\n  // Main loop function.\n  void loop();\n\n  // Check whether some handlers are currently registered.\n  bool hasRegisteredHandlers();\n\n  Fd epollFd_;\n  Fd eventFd_;\n  std::atomic<bool> closed_{false};\n  std::atomic<bool> joined_{false};\n  std::thread thread_;\n\n  // Interaction with epoll(7).\n  //\n  // A dedicated thread runs epoll_wait(2) in a loop and, every time it returns,\n  // it defers a function to the reactor which is responsible for processing the\n  // epoll events and executing the handlers, and then notify the epoll thread\n  // that it is done, for it to start another iteration. This back-and-forth\n  // between these threads is done to ensure that all epoll handlers are run\n  // from the reactor thread, just like everything else. Doing so makes it\n  // easier to reason about how certain events are sequenced. For example, if\n  // another processes first makes a write to a connection and then closes the\n  // accompanying Unix domain socket, we know for a fact that the reactor will\n  // first react to the write, and then react to the epoll event caused by\n  // closing the socket. If we didn't force serialization onto the reactor, we\n  // would not have this guarantee.\n  //\n  // It's safe to call epoll_ctl from one thread while another thread is blocked\n  // on an epoll_wait call. This means that the kernel internally serializes the\n  // operations on a single epoll fd. However, we have no way to control whether\n  // a modification of the set of file descriptors monitored by epoll occurred\n  // just before or just after the return from the epoll_wait. This means that\n  // when we start processing the result of epoll_wait we can't know what set of\n  // file descriptors it operated on. This becomes a problem if, for example, in\n  // between the moment epoll_wait returns and the moment we process the results\n  // a file descriptor is unregistered and closed and another one with the same\n  // value is opened and registered: we'd end up calling the handler of the new\n  // fd for the events of the old one (which probably include errors).\n  //\n  // However, epoll offers a way to address this: epoll_wait returns, for each\n  // event, the piece of extra data that was provided by the *last* call on\n  // epoll_ctl for that fd. This allows us to detect whether epoll_wait had\n  // taken into account an update to the set of fds or not. We do so by giving\n  // each update a unique identifier, called \"record\". Each update to a fd will\n  // associate a new record to it. The handlers are associated to records (and\n  // not to fds), and for each fd we know which handler is the one currently\n  // installed. This way when processing an event we can detect whether the\n  // record for that event is still valid or whether it is stale, in which case\n  // we disregard the event, and wait for it to fire again at the next epoll\n  // iteration, with the up-to-date handler.\n  std::unordered_map<int, uint64_t> fdToRecord_;\n  std::unordered_map<uint64_t, std::shared_ptr<EventHandler>> recordToHandler_;\n  uint64_t nextRecord_{1}; // Reserve record 0 for the eventfd\n  std::mutex handlersMutex_;\n\n  // Deferred to the reactor to handle the events received by epoll_wait(2).\n  void handleEpollEventsFromLoop(std::vector<struct epoll_event> epollEvents);\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/error.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/error.h>\n\n#include <cstring>\n#include <sstream>\n\n#include <tensorpipe/common/defs.h>\n\nnamespace tensorpipe {\n\nconst Error Error::kSuccess = Error();\n\nstd::string Error::what() const {\n  TP_DCHECK(error_);\n  std::ostringstream ss;\n  ss << error_->what() << \" (this error originated at \" << file_ << \":\" << line_\n     << \")\";\n  return ss.str();\n}\n\nstd::string SystemError::what() const {\n  std::ostringstream ss;\n  ss << syscall_ << \": \" << strerror(error_);\n  return ss.str();\n}\n\nint SystemError::errorCode() const {\n  return error_;\n}\n\nstd::string ShortReadError::what() const {\n  std::ostringstream ss;\n  ss << \"short read: got \" << actual_ << \" bytes while expecting to read \"\n     << expected_ << \" bytes\";\n  return ss.str();\n}\n\nstd::string ShortWriteError::what() const {\n  std::ostringstream ss;\n  ss << \"short write: wrote \" << actual_ << \" bytes while expecting to write \"\n     << expected_ << \" bytes\";\n  return ss.str();\n}\n\nstd::string EOFError::what() const {\n  return \"eof\";\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/error.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n\nnamespace tensorpipe {\n\n// Base class for actual errors.\nclass BaseError {\n public:\n  virtual ~BaseError() = default;\n\n  // Returns an explanatory string.\n  // Like `std::exception` but returns a `std::string`.\n  virtual std::string what() const = 0;\n};\n\n// Wrapper class for errors.\n//\n// Background: we wish to not use exceptions yet need an error\n// representation that can propagate across function and thread\n// boundaries. This representation must be copyable (so we can store\n// and return it at a later point in time) and retain downstream type\n// information. This implies a heap allocation because it's the\n// easiest way to deal with variable size objects (barring a union of\n// all downstream error classes and a lot of custom code). Instead of\n// passing a shared_ptr around directly, we use this wrapper class to\n// keep implementation details hidden from calling code.\n//\nclass Error final {\n public:\n  // Constant instance that indicates success.\n  static const Error kSuccess;\n\n  // Default constructor for error that is not an error.\n  Error() {}\n\n  Error(std::shared_ptr<BaseError> error, std::string file, int line)\n      : error_(std::move(error)), file_(std::move(file)), line_(line) {}\n\n  ~Error() = default;\n\n  // Converting to boolean means checking if there is an error. This\n  // means we don't need to use an `std::optional` and allows for a\n  // snippet like the following:\n  //\n  //   if (error) {\n  //     // Deal with it.\n  //   }\n  //\n  operator bool() const {\n    return static_cast<bool>(error_);\n  }\n\n  template <typename T>\n  std::shared_ptr<T> castToType() const {\n    return std::dynamic_pointer_cast<T>(error_);\n  }\n\n  template <typename T>\n  bool isOfType() const {\n    return castToType<T>() != nullptr;\n  }\n\n  // Like `std::exception` but returns a `std::string`.\n  std::string what() const;\n\n private:\n  std::shared_ptr<BaseError> error_;\n  std::string file_;\n  int line_;\n};\n\nclass SystemError final : public BaseError {\n public:\n  explicit SystemError(const char* syscall, int error)\n      : syscall_(syscall), error_(error) {}\n\n  std::string what() const override;\n\n  int errorCode() const;\n\n private:\n  const char* syscall_;\n  const int error_;\n};\n\nclass ShortReadError final : public BaseError {\n public:\n  ShortReadError(ssize_t expected, ssize_t actual)\n      : expected_(expected), actual_(actual) {}\n\n  std::string what() const override;\n\n private:\n  const ssize_t expected_;\n  const ssize_t actual_;\n};\n\nclass ShortWriteError final : public BaseError {\n public:\n  ShortWriteError(ssize_t expected, ssize_t actual)\n      : expected_(expected), actual_(actual) {}\n\n  std::string what() const override;\n\n private:\n  const ssize_t expected_;\n  const ssize_t actual_;\n};\n\nclass EOFError final : public BaseError {\n public:\n  EOFError() {}\n\n  std::string what() const override;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/error_macros.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n\n#define TP_CREATE_ERROR(typ, ...)         \\\n  (Error(                                 \\\n      std::make_shared<typ>(__VA_ARGS__), \\\n      TP_TRIM_FILENAME(__FILE__),         \\\n      __LINE__))\n"
  },
  {
    "path": "tensorpipe/common/fd.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/fd.h>\n\n#include <unistd.h>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/error_macros.h>\n\nnamespace tensorpipe {\n\nssize_t Fd::read(void* buf, size_t count) {\n  ssize_t rv = -1;\n  for (;;) {\n    rv = ::read(fd_, buf, count);\n    if (rv == -1 && errno == EINTR) {\n      continue;\n    }\n    break;\n  }\n  return rv;\n}\n\n// Proxy to write(2) with EINTR retry.\nssize_t Fd::write(const void* buf, size_t count) {\n  ssize_t rv = -1;\n  for (;;) {\n    rv = ::write(fd_, buf, count);\n    if (rv == -1 && errno == EINTR) {\n      continue;\n    }\n    break;\n  }\n  return rv;\n}\n\n// Call read and throw if it doesn't complete.\nError Fd::readFull(void* buf, size_t count) {\n  auto rv = read(buf, count);\n  if (rv == -1) {\n    return TP_CREATE_ERROR(SystemError, \"read\", errno);\n  }\n  if (rv != count) {\n    return TP_CREATE_ERROR(ShortReadError, count, rv);\n  }\n  return Error::kSuccess;\n}\n\n// Call write and throw if it doesn't complete.\nError Fd::writeFull(const void* buf, size_t count) {\n  auto rv = write(buf, count);\n  if (rv == -1) {\n    return TP_CREATE_ERROR(SystemError, \"write\", errno);\n  }\n  if (rv != count) {\n    return TP_CREATE_ERROR(ShortWriteError, count, rv);\n  }\n  return Error::kSuccess;\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/fd.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <stdexcept>\n#include <type_traits>\n\n#include <unistd.h>\n\n#include <tensorpipe/common/error.h>\n\nnamespace tensorpipe {\n\nclass Fd {\n public:\n  Fd() = default;\n\n  explicit Fd(int fd) : fd_(fd) {}\n\n  virtual ~Fd() {\n    reset();\n  }\n\n  // Disable copy constructor.\n  Fd(const Fd&) = delete;\n\n  // Disable copy assignment.\n  Fd& operator=(const Fd&) = delete;\n\n  // Custom move constructor.\n  Fd(Fd&& other) noexcept {\n    std::swap(fd_, other.fd_);\n  }\n\n  // Custom move assignment.\n  Fd& operator=(Fd&& other) noexcept {\n    std::swap(fd_, other.fd_);\n    return *this;\n  }\n\n  // Return underlying file descriptor.\n  int fd() const {\n    return fd_;\n  }\n\n  bool hasValue() const {\n    return fd_ >= 0;\n  }\n\n  void reset() {\n    if (hasValue()) {\n      ::close(fd_);\n      fd_ = -1;\n    }\n  }\n\n  // Proxy to read(2) with EINTR retry.\n  ssize_t read(void* buf, size_t count);\n\n  // Proxy to write(2) with EINTR retry.\n  ssize_t write(const void* buf, size_t count);\n\n  // Call read and return error if it doesn't exactly read `count` bytes.\n  Error readFull(void* buf, size_t count);\n\n  // Call write and return error if it doesn't exactly write `count` bytes.\n  Error writeFull(const void* buf, size_t count);\n\n  // Call `readFull` with trivially copyable type. Throws on errors.\n  template <typename T>\n  T readOrThrow() {\n    T tmp;\n    static_assert(std::is_trivially_copyable<T>::value, \"!\");\n    auto err = readFull(&tmp, sizeof(T));\n    if (err) {\n      throw std::runtime_error(err.what());\n    }\n    return tmp;\n  }\n\n  // Call `writeFull` with trivially copyable type. Throws on errors.\n  template <typename T>\n  void writeOrThrow(const T& t) {\n    static_assert(std::is_trivially_copyable<T>::value, \"!\");\n    auto err = writeFull(&t, sizeof(T));\n    if (err) {\n      throw std::runtime_error(err.what());\n    }\n  }\n\n  // Call `readFull` with trivially copyable type.\n  template <typename T>\n  Error read(T* t) {\n    static_assert(std::is_trivially_copyable<T>::value, \"!\");\n    return readFull(t, sizeof(T));\n  }\n\n  // Call `writeFull` with trivially copyable type.\n  template <typename T>\n  Error write(const T& t) {\n    static_assert(std::is_trivially_copyable<T>::value, \"!\");\n    return writeFull(&t, sizeof(T));\n  }\n\n protected:\n  int fd_{-1};\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/ibv.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/ibv.h>\n\n#include <cstdlib>\n#include <cstring>\n\nnamespace tensorpipe {\n\nstd::string ibvWorkCompletionOpcodeToStr(IbvLib::wc_opcode opcode) {\n  switch (opcode) {\n    case IbvLib::WC_SEND:\n      return \"SEND\";\n    case IbvLib::WC_RDMA_WRITE:\n      return \"RDMA_WRITE\";\n    case IbvLib::WC_RDMA_READ:\n      return \"RDMA_READ\";\n    case IbvLib::WC_COMP_SWAP:\n      return \"COMP_SWAP\";\n    case IbvLib::WC_FETCH_ADD:\n      return \"FETCH_ADD\";\n    case IbvLib::WC_BIND_MW:\n      return \"BIND_MW\";\n    case IbvLib::WC_RECV:\n      return \"RECV\";\n    case IbvLib::WC_RECV_RDMA_WITH_IMM:\n      return \"RECV_RDMA_WITH_IMM\";\n    default:\n      return \"UNKNOWN (\" + std::to_string(opcode) + \")\";\n  }\n}\n\nstruct IbvAddress makeIbvAddress(\n    const IbvLib& ibvLib,\n    const IbvContext& context,\n    uint8_t portNum,\n    uint8_t globalIdentifierIndex) {\n  struct IbvAddress addr;\n  std::memset(&addr, 0, sizeof(addr));\n\n  addr.portNum = portNum;\n  addr.globalIdentifierIndex = globalIdentifierIndex;\n\n  IbvLib::port_attr portAttr;\n  std::memset(&portAttr, 0, sizeof(portAttr));\n  TP_CHECK_IBV_INT(ibvLib.query_port(context.get(), portNum, &portAttr));\n  addr.localIdentifier = portAttr.lid;\n  addr.maximumTransmissionUnit = portAttr.active_mtu;\n  addr.maximumMessageSize = portAttr.max_msg_sz;\n\n  TP_CHECK_IBV_INT(ibvLib.query_gid(\n      context.get(), portNum, globalIdentifierIndex, &addr.globalIdentifier));\n\n  return addr;\n}\n\nstruct IbvSetupInformation makeIbvSetupInformation(\n    const IbvAddress& addr,\n    const IbvQueuePair& qp) {\n  struct IbvSetupInformation info;\n  std::memset(&info, 0, sizeof(info));\n\n  info.localIdentifier = addr.localIdentifier;\n  info.globalIdentifier = addr.globalIdentifier;\n  info.queuePairNumber = qp->qp_num;\n  info.maximumTransmissionUnit = addr.maximumTransmissionUnit;\n  info.maximumMessageSize = addr.maximumMessageSize;\n\n  return info;\n}\n\nvoid transitionIbvQueuePairToInit(\n    const IbvLib& ibvLib,\n    IbvQueuePair& qp,\n    const IbvAddress& selfAddr) {\n  IbvLib::qp_attr attr;\n  std::memset(&attr, 0, sizeof(attr));\n  int attrMask = 0;\n\n  attrMask |= IbvLib::QP_STATE;\n  attr.qp_state = IbvLib::QPS_INIT;\n\n  // Hardcode the use of the first entry of the partition key table, as it will\n  // always be valid.\n  // FIXME: Make this configurable similarly to the port number.\n  attrMask |= IbvLib::QP_PKEY_INDEX;\n  attr.pkey_index = 0;\n\n  attrMask |= IbvLib::QP_PORT;\n  attr.port_num = selfAddr.portNum;\n\n  attrMask |= IbvLib::QP_ACCESS_FLAGS;\n  attr.qp_access_flags =\n      IbvLib::ACCESS_LOCAL_WRITE | IbvLib::ACCESS_REMOTE_WRITE;\n\n  TP_CHECK_IBV_INT(ibvLib.modify_qp(qp.get(), &attr, attrMask));\n}\n\nvoid transitionIbvQueuePairToReadyToReceive(\n    const IbvLib& ibvLib,\n    IbvQueuePair& qp,\n    const IbvAddress& selfAddr,\n    const IbvSetupInformation& destinationInfo) {\n  IbvLib::qp_attr attr;\n  std::memset(&attr, 0, sizeof(attr));\n  int attrMask = 0;\n\n  attrMask |= IbvLib::QP_STATE;\n  attr.qp_state = IbvLib::QPS_RTR;\n\n  // Global routing is only set up as far as needed to support RoCE.\n  attrMask |= IbvLib::QP_AV;\n  if (destinationInfo.localIdentifier != 0) {\n    attr.ah_attr.is_global = 0;\n    attr.ah_attr.dlid = destinationInfo.localIdentifier;\n  } else {\n    attr.ah_attr.is_global = 1;\n    attr.ah_attr.grh.dgid = destinationInfo.globalIdentifier;\n    attr.ah_attr.grh.sgid_index = selfAddr.globalIdentifierIndex;\n    attr.ah_attr.grh.hop_limit = 1;\n  }\n  attr.ah_attr.port_num = selfAddr.portNum;\n\n  attrMask |= IbvLib::QP_PATH_MTU;\n  attr.path_mtu = std::min(\n      selfAddr.maximumTransmissionUnit,\n      destinationInfo.maximumTransmissionUnit);\n\n  attrMask |= IbvLib::QP_DEST_QPN;\n  attr.dest_qp_num = destinationInfo.queuePairNumber;\n\n  // The packet sequence numbers of the local send and of the remote receive\n  // queues (and vice versa) only need to match. Thus we set them all to zero.\n  attrMask |= IbvLib::QP_RQ_PSN;\n  attr.rq_psn = 0;\n\n  attrMask |= IbvLib::QP_MAX_DEST_RD_ATOMIC;\n  attr.max_dest_rd_atomic = 1;\n\n  attrMask |= IbvLib::QP_MIN_RNR_TIMER;\n  attr.min_rnr_timer = 20; // 10.24 milliseconds\n\n  TP_CHECK_IBV_INT(ibvLib.modify_qp(qp.get(), &attr, attrMask));\n}\n\nvoid transitionIbvQueuePairToReadyToSend(\n    const IbvLib& ibvLib,\n    IbvQueuePair& qp) {\n  IbvLib::qp_attr attr;\n  std::memset(&attr, 0, sizeof(attr));\n  int attrMask = 0;\n\n  attrMask |= IbvLib::QP_STATE;\n  attr.qp_state = IbvLib::QPS_RTS;\n\n  // The packet sequence numbers of the local send and of the remote receive\n  // queues (and vice versa) only need to match. Thus we set them all to zero.\n  attrMask |= IbvLib::QP_SQ_PSN;\n  attr.sq_psn = 0;\n\n  attrMask |= IbvLib::QP_TIMEOUT;\n  attr.timeout = 14; // 67.1 milliseconds\n\n  attrMask |= IbvLib::QP_RETRY_CNT;\n  attr.retry_cnt = 7;\n\n  attrMask |= IbvLib::QP_RNR_RETRY;\n  attr.rnr_retry = 7; // infinite\n\n  attrMask |= IbvLib::QP_MAX_QP_RD_ATOMIC;\n  attr.max_rd_atomic = 1;\n\n  TP_CHECK_IBV_INT(ibvLib.modify_qp(qp.get(), &attr, attrMask));\n}\n\nvoid transitionIbvQueuePairToError(const IbvLib& ibvLib, IbvQueuePair& qp) {\n  IbvLib::qp_attr attr;\n  std::memset(&attr, 0, sizeof(attr));\n  int attrMask = 0;\n\n  attrMask |= IbvLib::QP_STATE;\n  attr.qp_state = IbvLib::QPS_ERR;\n\n  TP_CHECK_IBV_INT(ibvLib.modify_qp(qp.get(), &attr, attrMask));\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/ibv.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/ibv_lib.h>\n\nnamespace tensorpipe {\n\n// Error checking macros\n\n#define TP_CHECK_IBV_PTR(op)                   \\\n  [&]() {                                      \\\n    auto ptr = op;                             \\\n    TP_THROW_SYSTEM_IF(ptr == nullptr, errno); \\\n    return ptr;                                \\\n  }()\n\n#define TP_CHECK_IBV_INT(op)           \\\n  {                                    \\\n    int rv = op;                       \\\n    TP_THROW_SYSTEM_IF(rv < 0, errno); \\\n  }\n\n#define TP_CHECK_IBV_VOID(op) op;\n\n// Logging helpers\n\nstd::string ibvWorkCompletionOpcodeToStr(IbvLib::wc_opcode opcode);\n\n// RAII wrappers\n\nclass IbvDeviceList {\n private:\n  IbvDeviceList(const IbvLib& ibvLib, IbvLib::device** ptr, int size)\n      : deviceList_(ptr, Deleter{&ibvLib}), size_(size) {}\n\n public:\n  IbvDeviceList() = default;\n\n  static std::tuple<Error, IbvDeviceList> create(const IbvLib& ibvLib) {\n    int size;\n    IbvLib::device** ptr = ibvLib.get_device_list(&size);\n    if (ptr == nullptr) {\n      // Earlier versions of libibverbs had a bug where errno would be set to\n      // *negative* ENOSYS when the module wasn't found. This got fixed in\n      // https://github.com/linux-rdma/rdma-core/commit/062bf1a72badaf6ad2d51ebe4c8c8bdccfc376e2\n      // However, to support those versions, we manually flip it in case.\n      return std::make_tuple(\n          TP_CREATE_ERROR(\n              SystemError,\n              \"ibv_get_device_list\",\n              errno == -ENOSYS ? ENOSYS : errno),\n          IbvDeviceList());\n    }\n    return std::make_tuple(Error::kSuccess, IbvDeviceList(ibvLib, ptr, size));\n  }\n\n  int size() {\n    return size_;\n  }\n\n  IbvLib::device& operator[](int i) {\n    return *deviceList_.get()[i];\n  }\n\n  void reset() {\n    deviceList_.reset();\n  }\n\n  // FIXME Can we support a \"range\" API (i.e., a begin() and end() method) so\n  // that this can be used in a for (auto& dev : deviceList) expression?\n\n private:\n  struct Deleter {\n    void operator()(IbvLib::device** ptr) {\n      TP_CHECK_IBV_VOID(ibvLib->free_device_list(ptr));\n    }\n\n    const IbvLib* ibvLib;\n  };\n\n  std::unique_ptr<IbvLib::device*, Deleter> deviceList_;\n  int size_;\n};\n\nstruct IbvContextDeleter {\n  void operator()(IbvLib::context* ptr) {\n    TP_CHECK_IBV_INT(ibvLib->close_device(ptr));\n  }\n\n  const IbvLib* ibvLib;\n};\n\nusing IbvContext = std::unique_ptr<IbvLib::context, IbvContextDeleter>;\n\ninline IbvContext createIbvContext(\n    const IbvLib& ibvLib,\n    IbvLib::device& device) {\n  return IbvContext(\n      TP_CHECK_IBV_PTR(ibvLib.open_device(&device)),\n      IbvContextDeleter{&ibvLib});\n}\n\nstruct IbvProtectionDomainDeleter {\n  void operator()(IbvLib::pd* ptr) {\n    TP_CHECK_IBV_INT(ibvLib->dealloc_pd(ptr));\n  }\n\n  const IbvLib* ibvLib;\n};\n\nusing IbvProtectionDomain =\n    std::unique_ptr<IbvLib::pd, IbvProtectionDomainDeleter>;\n\ninline IbvProtectionDomain createIbvProtectionDomain(\n    const IbvLib& ibvLib,\n    IbvContext& context) {\n  return IbvProtectionDomain(\n      TP_CHECK_IBV_PTR(ibvLib.alloc_pd(context.get())),\n      IbvProtectionDomainDeleter{&ibvLib});\n}\n\nstruct IbvCompletionQueueDeleter {\n  void operator()(IbvLib::cq* ptr) {\n    TP_CHECK_IBV_INT(ibvLib->destroy_cq(ptr));\n  }\n\n  const IbvLib* ibvLib;\n};\n\nusing IbvCompletionQueue =\n    std::unique_ptr<IbvLib::cq, IbvCompletionQueueDeleter>;\n\ninline IbvCompletionQueue createIbvCompletionQueue(\n    const IbvLib& ibvLib,\n    IbvContext& context,\n    int cqe,\n    void* cq_context,\n    IbvLib::comp_channel* channel,\n    int comp_vector) {\n  return IbvCompletionQueue(\n      TP_CHECK_IBV_PTR(ibvLib.create_cq(\n          context.get(), cqe, cq_context, channel, comp_vector)),\n      IbvCompletionQueueDeleter{&ibvLib});\n}\n\nstruct IbvSharedReceiveQueueDeleter {\n  void operator()(IbvLib::srq* ptr) {\n    TP_CHECK_IBV_INT(ibvLib->destroy_srq(ptr));\n  }\n\n  const IbvLib* ibvLib;\n};\n\nusing IbvSharedReceiveQueue =\n    std::unique_ptr<IbvLib::srq, IbvSharedReceiveQueueDeleter>;\n\ninline IbvSharedReceiveQueue createIbvSharedReceiveQueue(\n    const IbvLib& ibvLib,\n    IbvProtectionDomain& pd,\n    IbvLib::srq_init_attr& initAttr) {\n  return IbvSharedReceiveQueue(\n      TP_CHECK_IBV_PTR(ibvLib.create_srq(pd.get(), &initAttr)),\n      IbvSharedReceiveQueueDeleter{&ibvLib});\n}\n\nstruct IbvMemoryRegionDeleter {\n  void operator()(IbvLib::mr* ptr) {\n    TP_CHECK_IBV_INT(ibvLib->dereg_mr(ptr));\n  }\n\n  const IbvLib* ibvLib;\n};\n\nusing IbvMemoryRegion = std::unique_ptr<IbvLib::mr, IbvMemoryRegionDeleter>;\n\ninline IbvMemoryRegion createIbvMemoryRegion(\n    const IbvLib& ibvLib,\n    IbvProtectionDomain& pd,\n    void* addr,\n    size_t length,\n    int accessFlags) {\n  return IbvMemoryRegion(\n      TP_CHECK_IBV_PTR(ibvLib.reg_mr(pd.get(), addr, length, accessFlags)),\n      IbvMemoryRegionDeleter{&ibvLib});\n}\n\nstruct IbvQueuePairDeleter {\n  void operator()(IbvLib::qp* ptr) {\n    TP_CHECK_IBV_INT(ibvLib->destroy_qp(ptr));\n  }\n\n  const IbvLib* ibvLib;\n};\n\nusing IbvQueuePair = std::unique_ptr<IbvLib::qp, IbvQueuePairDeleter>;\n\ninline IbvQueuePair createIbvQueuePair(\n    const IbvLib& ibvLib,\n    IbvProtectionDomain& pd,\n    IbvLib::qp_init_attr& initAttr) {\n  return IbvQueuePair(\n      TP_CHECK_IBV_PTR(ibvLib.create_qp(pd.get(), &initAttr)),\n      IbvQueuePairDeleter{&ibvLib});\n}\n\n// Helpers\n\nstruct IbvAddress {\n  uint8_t portNum;\n  uint8_t globalIdentifierIndex;\n  // The already-resolved LID of the above device+port pair.\n  uint32_t localIdentifier;\n  // The already-resolved GID of the above device+port+index combination.\n  IbvLib::gid globalIdentifier;\n  IbvLib::mtu maximumTransmissionUnit;\n  uint32_t maximumMessageSize;\n};\n\nstruct IbvSetupInformation {\n  uint32_t localIdentifier;\n  IbvLib::gid globalIdentifier;\n  uint32_t queuePairNumber;\n  IbvLib::mtu maximumTransmissionUnit;\n  uint32_t maximumMessageSize;\n};\n\nstruct IbvAddress makeIbvAddress(\n    const IbvLib& ibvLib,\n    const IbvContext& context,\n    uint8_t portNum,\n    uint8_t globalIdentifierIndex);\n\nstruct IbvSetupInformation makeIbvSetupInformation(\n    const IbvAddress& addr,\n    const IbvQueuePair& qp);\n\nvoid transitionIbvQueuePairToInit(\n    const IbvLib& ibvLib,\n    IbvQueuePair& qp,\n    const IbvAddress& selfAddr);\n\nvoid transitionIbvQueuePairToReadyToReceive(\n    const IbvLib& ibvLib,\n    IbvQueuePair& qp,\n    const IbvAddress& selfAddr,\n    const IbvSetupInformation& destinationInfo);\n\nvoid transitionIbvQueuePairToReadyToSend(\n    const IbvLib& ibvLib,\n    IbvQueuePair& qp);\n\nvoid transitionIbvQueuePairToError(const IbvLib& ibvLib, IbvQueuePair& qp);\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/ibv_lib.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/dl.h>\n\nnamespace tensorpipe {\n\n// Master list of all symbols we care about from libibverbs.\n\n#define TP_FORALL_IBV_SYMBOLS(_)                                      \\\n  _(ack_async_event, void, (IbvLib::async_event*))                    \\\n  _(alloc_pd, IbvLib::pd*, (IbvLib::context*))                        \\\n  _(close_device, int, (IbvLib::context*))                            \\\n  _(create_cq,                                                        \\\n    IbvLib::cq*,                                                      \\\n    (IbvLib::context*, int, void*, IbvLib::comp_channel*, int))       \\\n  _(create_qp, IbvLib::qp*, (IbvLib::pd*, IbvLib::qp_init_attr*))     \\\n  _(create_srq, IbvLib::srq*, (IbvLib::pd*, IbvLib::srq_init_attr*))  \\\n  _(dealloc_pd, int, (IbvLib::pd*))                                   \\\n  _(dereg_mr, int, (IbvLib::mr*))                                     \\\n  _(destroy_cq, int, (IbvLib::cq*))                                   \\\n  _(destroy_qp, int, (IbvLib::qp*))                                   \\\n  _(destroy_srq, int, (IbvLib::srq*))                                 \\\n  _(event_type_str, const char*, (IbvLib::event_type))                \\\n  _(free_device_list, void, (IbvLib::device**))                       \\\n  _(get_async_event, int, (IbvLib::context*, IbvLib::async_event*))   \\\n  _(get_device_list, IbvLib::device**, (int*))                        \\\n  _(get_device_name, const char*, (IbvLib::device*))                  \\\n  _(modify_qp, int, (IbvLib::qp*, IbvLib::qp_attr*, int))             \\\n  _(open_device, IbvLib::context*, (IbvLib::device*))                 \\\n  _(query_gid, int, (IbvLib::context*, uint8_t, int, IbvLib::gid*))   \\\n  _(query_port, int, (IbvLib::context*, uint8_t, IbvLib::port_attr*)) \\\n  _(reg_mr, IbvLib::mr*, (IbvLib::pd*, void*, size_t, int))           \\\n  _(wc_status_str, const char*, (IbvLib::wc_status))\n\n// Wrapper for libibverbs.\n\nclass IbvLib {\n public:\n  // Constants\n\n  enum { SYSFS_NAME_MAX = 64, SYSFS_PATH_MAX = 256 };\n  enum { WC_IP_CSUM_OK_SHIFT = 2 };\n\n  // Enums\n\n  enum access_flags {\n    ACCESS_LOCAL_WRITE = 1,\n    ACCESS_REMOTE_WRITE = (1 << 1),\n    ACCESS_REMOTE_READ = (1 << 2),\n    ACCESS_REMOTE_ATOMIC = (1 << 3),\n    ACCESS_MW_BIND = (1 << 4),\n    ACCESS_ZERO_BASED = (1 << 5),\n    ACCESS_ON_DEMAND = (1 << 6),\n    ACCESS_HUGETLB = (1 << 7),\n    ACCESS_RELAXED_ORDERING = (1 << 20),\n  };\n\n  enum event_type {\n    EVENT_CQ_ERR,\n    EVENT_QP_FATAL,\n    EVENT_QP_REQ_ERR,\n    EVENT_QP_ACCESS_ERR,\n    EVENT_COMM_EST,\n    EVENT_SQ_DRAINED,\n    EVENT_PATH_MIG,\n    EVENT_PATH_MIG_ERR,\n    EVENT_DEVICE_FATAL,\n    EVENT_PORT_ACTIVE,\n    EVENT_PORT_ERR,\n    EVENT_LID_CHANGE,\n    EVENT_PKEY_CHANGE,\n    EVENT_SM_CHANGE,\n    EVENT_SRQ_ERR,\n    EVENT_SRQ_LIMIT_REACHED,\n    EVENT_QP_LAST_WQE_REACHED,\n    EVENT_CLIENT_REREGISTER,\n    EVENT_GID_CHANGE,\n    EVENT_WQ_FATAL,\n  };\n\n  enum mig_state { MIG_MIGRATED, MIG_REARM, MIG_ARMED };\n\n  enum mtu {\n    MTU_256 = 1,\n    MTU_512 = 2,\n    MTU_1024 = 3,\n    MTU_2048 = 4,\n    MTU_4096 = 5\n  };\n\n  enum mw_type { MW_TYPE_1 = 1, MW_TYPE_2 = 2 };\n\n  enum node_type {\n    NODE_UNKNOWN = -1,\n    NODE_CA = 1,\n    NODE_SWITCH,\n    NODE_ROUTER,\n    NODE_RNIC,\n    NODE_USNIC,\n    NODE_USNIC_UDP,\n    NODE_UNSPECIFIED,\n  };\n\n  enum port_state {\n    PORT_NOP = 0,\n    PORT_DOWN = 1,\n    PORT_INIT = 2,\n    PORT_ARMED = 3,\n    PORT_ACTIVE = 4,\n    PORT_ACTIVE_DEFER = 5\n  };\n\n  enum qp_attr_mask {\n    QP_STATE = 1 << 0,\n    QP_CUR_STATE = 1 << 1,\n    QP_EN_SQD_ASYNC_NOTIFY = 1 << 2,\n    QP_ACCESS_FLAGS = 1 << 3,\n    QP_PKEY_INDEX = 1 << 4,\n    QP_PORT = 1 << 5,\n    QP_QKEY = 1 << 6,\n    QP_AV = 1 << 7,\n    QP_PATH_MTU = 1 << 8,\n    QP_TIMEOUT = 1 << 9,\n    QP_RETRY_CNT = 1 << 10,\n    QP_RNR_RETRY = 1 << 11,\n    QP_RQ_PSN = 1 << 12,\n    QP_MAX_QP_RD_ATOMIC = 1 << 13,\n    QP_ALT_PATH = 1 << 14,\n    QP_MIN_RNR_TIMER = 1 << 15,\n    QP_SQ_PSN = 1 << 16,\n    QP_MAX_DEST_RD_ATOMIC = 1 << 17,\n    QP_PATH_MIG_STATE = 1 << 18,\n    QP_CAP = 1 << 19,\n    QP_DEST_QPN = 1 << 20,\n    QP_RATE_LIMIT = 1 << 25,\n  };\n\n  enum qp_state {\n    QPS_RESET,\n    QPS_INIT,\n    QPS_RTR,\n    QPS_RTS,\n    QPS_SQD,\n    QPS_SQE,\n    QPS_ERR,\n    QPS_UNKNOWN\n  };\n\n  enum qp_type {\n    QPT_RC = 2,\n    QPT_UC,\n    QPT_UD,\n    QPT_RAW_PACKET = 8,\n    QPT_XRC_SEND = 9,\n    QPT_XRC_RECV,\n    QPT_DRIVER = 0xff,\n  };\n\n  enum transport_type {\n    TRANSPORT_UNKNOWN = -1,\n    TRANSPORT_IB = 0,\n    TRANSPORT_IWARP,\n    TRANSPORT_USNIC,\n    TRANSPORT_USNIC_UDP,\n    TRANSPORT_UNSPECIFIED,\n  };\n\n  enum wc_flags {\n    WC_GRH = 1 << 0,\n    WC_WITH_IMM = 1 << 1,\n    WC_IP_CSUM_OK = 1 << WC_IP_CSUM_OK_SHIFT,\n    WC_WITH_INV = 1 << 3,\n    WC_TM_SYNC_REQ = 1 << 4,\n    WC_TM_MATCH = 1 << 5,\n    WC_TM_DATA_VALID = 1 << 6,\n  };\n\n  enum wc_opcode {\n    WC_SEND,\n    WC_RDMA_WRITE,\n    WC_RDMA_READ,\n    WC_COMP_SWAP,\n    WC_FETCH_ADD,\n    WC_BIND_MW,\n    WC_LOCAL_INV,\n    WC_TSO,\n    WC_RECV = 1 << 7,\n    WC_RECV_RDMA_WITH_IMM,\n\n    WC_TM_ADD,\n    WC_TM_DEL,\n    WC_TM_SYNC,\n    WC_TM_RECV,\n    WC_TM_NO_TAG,\n    WC_DRIVER1,\n  };\n\n  enum wc_status {\n    WC_SUCCESS,\n    WC_LOC_LEN_ERR,\n    WC_LOC_QP_OP_ERR,\n    WC_LOC_EEC_OP_ERR,\n    WC_LOC_PROT_ERR,\n    WC_WR_FLUSH_ERR,\n    WC_MW_BIND_ERR,\n    WC_BAD_RESP_ERR,\n    WC_LOC_ACCESS_ERR,\n    WC_REM_INV_REQ_ERR,\n    WC_REM_ACCESS_ERR,\n    WC_REM_OP_ERR,\n    WC_RETRY_EXC_ERR,\n    WC_RNR_RETRY_EXC_ERR,\n    WC_LOC_RDD_VIOL_ERR,\n    WC_REM_INV_RD_REQ_ERR,\n    WC_REM_ABORT_ERR,\n    WC_INV_EECN_ERR,\n    WC_INV_EEC_STATE_ERR,\n    WC_FATAL_ERR,\n    WC_RESP_TIMEOUT_ERR,\n    WC_GENERAL_ERR,\n    WC_TM_ERR,\n    WC_TM_RNDV_INCOMPLETE,\n  };\n\n  enum wr_opcode {\n    WR_RDMA_WRITE,\n    WR_RDMA_WRITE_WITH_IMM,\n    WR_SEND,\n    WR_SEND_WITH_IMM,\n    WR_RDMA_READ,\n    WR_ATOMIC_CMP_AND_SWP,\n    WR_ATOMIC_FETCH_AND_ADD,\n    WR_LOCAL_INV,\n    WR_BIND_MW,\n    WR_SEND_WITH_INV,\n    WR_TSO,\n    WR_DRIVER1,\n  };\n\n  // Structs and unions\n\n  // Forward declarations\n\n  struct _compat_port_attr;\n  struct ah;\n  struct context;\n  struct cq;\n  struct device;\n  struct mr;\n  struct mw_bind;\n  struct mw;\n  struct pd;\n  struct qp;\n  struct srq;\n  struct wq;\n\n  // Attributes\n\n  struct port_attr {\n    IbvLib::port_state state;\n    IbvLib::mtu max_mtu;\n    IbvLib::mtu active_mtu;\n    int gid_tbl_len;\n    uint32_t port_cap_flags;\n    uint32_t max_msg_sz;\n    uint32_t bad_pkey_cntr;\n    uint32_t qkey_viol_cntr;\n    uint16_t pkey_tbl_len;\n    uint16_t lid;\n    uint16_t sm_lid;\n    uint8_t lmc;\n    uint8_t max_vl_num;\n    uint8_t sm_sl;\n    uint8_t subnet_timeout;\n    uint8_t init_type_reply;\n    uint8_t active_width;\n    uint8_t active_speed;\n    uint8_t phys_state;\n    uint8_t link_layer;\n    uint8_t flags;\n    uint16_t port_cap_flags2;\n  };\n\n  struct qp_cap {\n    uint32_t max_send_wr;\n    uint32_t max_recv_wr;\n    uint32_t max_send_sge;\n    uint32_t max_recv_sge;\n    uint32_t max_inline_data;\n  };\n\n  union gid {\n    uint8_t raw[16];\n    struct {\n      uint64_t subnet_prefix;\n      uint64_t interface_id;\n    } global;\n  };\n\n  struct global_route {\n    IbvLib::gid dgid;\n    uint32_t flow_label;\n    uint8_t sgid_index;\n    uint8_t hop_limit;\n    uint8_t traffic_class;\n  };\n\n  struct ah_attr {\n    IbvLib::global_route grh;\n    uint16_t dlid;\n    uint8_t sl;\n    uint8_t src_path_bits;\n    uint8_t static_rate;\n    uint8_t is_global;\n    uint8_t port_num;\n  };\n\n  struct qp_attr {\n    IbvLib::qp_state qp_state;\n    IbvLib::qp_state cur_qp_state;\n    IbvLib::mtu path_mtu;\n    IbvLib::mig_state path_mig_state;\n    uint32_t qkey;\n    uint32_t rq_psn;\n    uint32_t sq_psn;\n    uint32_t dest_qp_num;\n    unsigned int qp_access_flags;\n    IbvLib::qp_cap cap;\n    IbvLib::ah_attr ah_attr;\n    IbvLib::ah_attr alt_ah_attr;\n    uint16_t pkey_index;\n    uint16_t alt_pkey_index;\n    uint8_t en_sqd_async_notify;\n    uint8_t sq_draining;\n    uint8_t max_rd_atomic;\n    uint8_t max_dest_rd_atomic;\n    uint8_t min_rnr_timer;\n    uint8_t port_num;\n    uint8_t timeout;\n    uint8_t retry_cnt;\n    uint8_t rnr_retry;\n    uint8_t alt_port_num;\n    uint8_t alt_timeout;\n    uint32_t rate_limit;\n  };\n\n  struct qp_init_attr {\n    void* qp_context;\n    IbvLib::cq* send_cq;\n    IbvLib::cq* recv_cq;\n    IbvLib::srq* srq;\n    IbvLib::qp_cap cap;\n    IbvLib::qp_type qp_type;\n    int sq_sig_all;\n  };\n\n  struct srq_attr {\n    uint32_t max_wr;\n    uint32_t max_sge;\n    uint32_t srq_limit;\n  };\n\n  struct srq_init_attr {\n    void* srq_context;\n    IbvLib::srq_attr attr;\n  };\n\n  // Work requests and completions\n\n  struct sge {\n    uint64_t addr;\n    uint32_t length;\n    uint32_t lkey;\n  };\n\n  struct recv_wr {\n    uint64_t wr_id;\n    IbvLib::recv_wr* next;\n    IbvLib::sge* sg_list;\n    int num_sge;\n  };\n\n  struct mw_bind_info {\n    IbvLib::mr* mr;\n    uint64_t addr;\n    uint64_t length;\n    unsigned int mw_access_flags;\n  };\n\n  struct send_wr {\n    uint64_t wr_id;\n    IbvLib::send_wr* next;\n    IbvLib::sge* sg_list;\n    int num_sge;\n    IbvLib::wr_opcode opcode;\n    unsigned int send_flags;\n    union {\n      uint32_t imm_data;\n      uint32_t invalidate_rkey;\n    };\n    union {\n      struct {\n        uint64_t remote_addr;\n        uint32_t rkey;\n      } rdma;\n      struct {\n        uint64_t remote_addr;\n        uint64_t compare_add;\n        uint64_t swap;\n        uint32_t rkey;\n      } atomic;\n      struct {\n        IbvLib::ah* ah;\n        uint32_t remote_qpn;\n        uint32_t remote_qkey;\n      } ud;\n    } wr;\n    union {\n      struct {\n        uint32_t remote_srqn;\n      } xrc;\n    } qp_type;\n    union {\n      struct {\n        IbvLib::mw* mw;\n        uint32_t rkey;\n        IbvLib::mw_bind_info bind_info;\n      } bind_mw;\n      struct {\n        void* hdr;\n        uint16_t hdr_sz;\n        uint16_t mss;\n      } tso;\n    };\n  };\n\n  struct wc {\n    uint64_t wr_id;\n    IbvLib::wc_status status;\n    IbvLib::wc_opcode opcode;\n    uint32_t vendor_err;\n    uint32_t byte_len;\n    union {\n      uint32_t imm_data;\n      uint32_t invalidated_rkey;\n    };\n    uint32_t qp_num;\n    uint32_t src_qp;\n    unsigned int wc_flags;\n    uint16_t pkey_index;\n    uint16_t slid;\n    uint8_t sl;\n    uint8_t dlid_path_bits;\n  };\n\n  // Main structs\n\n  struct async_event {\n    union {\n      IbvLib::cq* cq;\n      IbvLib::qp* qp;\n      IbvLib::srq* srq;\n      IbvLib::wq* wq;\n      int port_num;\n    } element;\n    IbvLib::event_type event_type;\n  };\n\n  struct comp_channel {\n    IbvLib::context* context;\n    int fd;\n    int refcnt;\n  };\n\n  struct context_ops {\n    void* (*_compat_query_device)(void);\n    int (*_compat_query_port)(\n        IbvLib::context* context,\n        uint8_t port_num,\n        struct IbvLib::_compat_port_attr* port_attr);\n    void* (*_compat_alloc_pd)(void);\n    void* (*_compat_dealloc_pd)(void);\n    void* (*_compat_reg_mr)(void);\n    void* (*_compat_rereg_mr)(void);\n    void* (*_compat_dereg_mr)(void);\n    IbvLib::mw* (*alloc_mw)(IbvLib::pd* pd, IbvLib::mw_type type);\n    int (*bind_mw)(IbvLib::qp* qp, IbvLib::mw* mw, IbvLib::mw_bind* mw_bind);\n    int (*dealloc_mw)(IbvLib::mw* mw);\n    void* (*_compat_create_cq)(void);\n    int (*poll_cq)(IbvLib::cq* cq, int num_entries, IbvLib::wc* wc);\n    int (*req_notify_cq)(IbvLib::cq* cq, int solicited_only);\n    void* (*_compat_cq_event)(void);\n    void* (*_compat_resize_cq)(void);\n    void* (*_compat_destroy_cq)(void);\n    void* (*_compat_create_srq)(void);\n    void* (*_compat_modify_srq)(void);\n    void* (*_compat_query_srq)(void);\n    void* (*_compat_destroy_srq)(void);\n    int (*post_srq_recv)(\n        IbvLib::srq* srq,\n        IbvLib::recv_wr* recv_wr,\n        IbvLib::recv_wr** bad_recv_wr);\n    void* (*_compat_create_qp)(void);\n    void* (*_compat_query_qp)(void);\n    void* (*_compat_modify_qp)(void);\n    void* (*_compat_destroy_qp)(void);\n    int (*post_send)(\n        IbvLib::qp* qp,\n        IbvLib::send_wr* wr,\n        IbvLib::send_wr** bad_wr);\n    int (*post_recv)(\n        IbvLib::qp* qp,\n        IbvLib::recv_wr* wr,\n        IbvLib::recv_wr** bad_wr);\n    void* (*_compat_create_ah)(void);\n    void* (*_compat_destroy_ah)(void);\n    void* (*_compat_attach_mcast)(void);\n    void* (*_compat_detach_mcast)(void);\n    void* (*_compat_async_event)(void);\n  };\n\n  struct context {\n    IbvLib::device* device;\n    IbvLib::context_ops ops;\n    int cmd_fd;\n    int async_fd;\n    int num_comp_vectors;\n    pthread_mutex_t mutex;\n    void* abi_compat;\n  };\n\n  struct cq {\n    IbvLib::context* context;\n    IbvLib::comp_channel* channel;\n    void* cq_context;\n    uint32_t handle;\n    int cqe;\n\n    pthread_mutex_t mutex;\n    pthread_cond_t cond;\n    uint32_t comp_events_completed;\n    uint32_t async_events_completed;\n  };\n\n  struct _device_ops {\n    IbvLib::context* (*_dummy1)(IbvLib::device* device, int cmd_fd);\n    void (*_dummy2)(IbvLib::context* context);\n  };\n\n  struct device {\n    IbvLib::_device_ops _ops;\n    IbvLib::node_type node_type;\n    IbvLib::transport_type transport_type;\n    char name[IbvLib::SYSFS_NAME_MAX];\n    char dev_name[IbvLib::SYSFS_NAME_MAX];\n    char dev_path[IbvLib::SYSFS_PATH_MAX];\n    char ibdev_path[IbvLib::SYSFS_PATH_MAX];\n  };\n\n  struct mr {\n    IbvLib::context* context;\n    IbvLib::pd* pd;\n    void* addr;\n    size_t length;\n    uint32_t handle;\n    uint32_t lkey;\n    uint32_t rkey;\n  };\n\n  struct pd {\n    IbvLib::context* context;\n    uint32_t handle;\n  };\n\n  struct qp {\n    IbvLib::context* context;\n    void* qp_context;\n    IbvLib::pd* pd;\n    IbvLib::cq* send_cq;\n    IbvLib::cq* recv_cq;\n    IbvLib::srq* srq;\n    uint32_t handle;\n    uint32_t qp_num;\n    IbvLib::qp_state state;\n    IbvLib::qp_type qp_type;\n\n    pthread_mutex_t mutex;\n    pthread_cond_t cond;\n    uint32_t events_completed;\n  };\n\n  struct srq {\n    IbvLib::context* context;\n    void* srq_context;\n    IbvLib::pd* pd;\n    uint32_t handle;\n\n    pthread_mutex_t mutex;\n    pthread_cond_t cond;\n    uint32_t events_completed;\n  };\n\n private:\n  explicit IbvLib(DynamicLibraryHandle dlhandle)\n      : dlhandle_(std::move(dlhandle)) {}\n\n  DynamicLibraryHandle dlhandle_;\n\n#define TP_DECLARE_FIELD(function_name, return_type, args_types) \\\n  return_type(*function_name##_ptr_) args_types = nullptr;\n  TP_FORALL_IBV_SYMBOLS(TP_DECLARE_FIELD)\n#undef TP_DECLARE_FIELD\n\n public:\n  IbvLib() = default;\n\n#define TP_FORWARD_CALL(function_name, return_type, args_types)  \\\n  template <typename... Args>                                    \\\n  auto function_name(Args&&... args) const {                     \\\n    return (*function_name##_ptr_)(std::forward<Args>(args)...); \\\n  }\n  TP_FORALL_IBV_SYMBOLS(TP_FORWARD_CALL)\n#undef TP_FORWARD_CALL\n\n  static std::tuple<Error, IbvLib> create() {\n    Error error;\n    DynamicLibraryHandle dlhandle;\n    // To keep things \"neat\" and contained, we open in \"local\" mode (as opposed\n    // to global) so that the ibverbs symbols can only be resolved through this\n    // handle and are not exposed (a.k.a., \"leaded\") to other shared objects.\n    std::tie(error, dlhandle) =\n        DynamicLibraryHandle::create(\"libibverbs.so.1\", RTLD_LOCAL | RTLD_LAZY);\n    if (error) {\n      return std::make_tuple(std::move(error), IbvLib());\n    }\n    // Log at level 9 as we can't know whether this will be used in a transport\n    // or channel, thus err on the side of this being as low-level as possible\n    // because we don't expect this to be of interest that often.\n    TP_VLOG(9) << [&]() -> std::string {\n      std::string filename;\n      std::tie(error, filename) = dlhandle.getFilename();\n      if (error) {\n        return \"Couldn't determine location of shared library libibverbs.so.1: \" +\n            error.what();\n      }\n      return \"Found shared library libibverbs.so.1 at \" + filename;\n    }();\n    IbvLib lib(std::move(dlhandle));\n#define TP_LOAD_SYMBOL(function_name, return_type, args_types)              \\\n  {                                                                         \\\n    void* ptr;                                                              \\\n    std::tie(error, ptr) = lib.dlhandle_.loadSymbol(\"ibv_\" #function_name); \\\n    if (error) {                                                            \\\n      return std::make_tuple(std::move(error), IbvLib());                   \\\n    }                                                                       \\\n    TP_THROW_ASSERT_IF(ptr == nullptr);                                     \\\n    lib.function_name##_ptr_ =                                              \\\n        reinterpret_cast<decltype(function_name##_ptr_)>(ptr);              \\\n  }\n    TP_FORALL_IBV_SYMBOLS(TP_LOAD_SYMBOL)\n#undef TP_LOAD_SYMBOL\n    return std::make_tuple(Error::kSuccess, std::move(lib));\n  }\n\n  // These functions (which, it would seem, are the ones that are used in the\n  // critical control path, and which thus must have the lowest latency and\n  // avoid any syscall/kernel overhead) are not exposed as symbols of\n  // libibverbs.so: they are defined inline in the header and, in fact, they\n  // access a function pointer stored on the ibv_context and execute it.\n\n  int poll_cq(IbvLib::cq* cq, int num_entries, IbvLib::wc* wc) const {\n    return cq->context->ops.poll_cq(cq, num_entries, wc);\n  }\n\n  int post_send(IbvLib::qp* qp, IbvLib::send_wr* wr, IbvLib::send_wr** bad_wr)\n      const {\n    return qp->context->ops.post_send(qp, wr, bad_wr);\n  }\n\n  int post_recv(IbvLib::qp* qp, IbvLib::recv_wr* wr, IbvLib::recv_wr** bad_wr)\n      const {\n    return qp->context->ops.post_recv(qp, wr, bad_wr);\n  }\n\n  int post_srq_recv(\n      IbvLib::srq* srq,\n      IbvLib::recv_wr* recv_wr,\n      IbvLib::recv_wr** bad_recv_wr) const {\n    return srq->context->ops.post_srq_recv(srq, recv_wr, bad_recv_wr);\n  }\n};\n\n#undef TP_FORALL_IBV_SYMBOLS\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/memory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <sys/mman.h>\n\n#include <cstdint>\n#include <memory>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/error_macros.h>\n\nnamespace tensorpipe {\n\nclass MmappedPtr {\n  MmappedPtr(uint8_t* ptr, size_t length) {\n    ptr_ = decltype(ptr_)(ptr, Deleter{length});\n  }\n\n public:\n  MmappedPtr() = default;\n\n  static std::tuple<Error, MmappedPtr> create(\n      size_t length,\n      int prot,\n      int flags,\n      int fd) {\n    void* ptr;\n    ptr = ::mmap(nullptr, length, prot, flags, fd, 0);\n    if (ptr == MAP_FAILED) {\n      return std::make_tuple(\n          TP_CREATE_ERROR(SystemError, \"mmap\", errno), MmappedPtr());\n    }\n    return std::make_tuple(\n        Error::kSuccess, MmappedPtr(reinterpret_cast<uint8_t*>(ptr), length));\n  }\n\n  uint8_t* ptr() {\n    return ptr_.get();\n  }\n\n  const uint8_t* ptr() const {\n    return ptr_.get();\n  }\n\n  size_t getLength() const {\n    return ptr_.get_deleter().length;\n  }\n\n  void reset() {\n    ptr_.reset();\n  }\n\n private:\n  struct Deleter {\n    size_t length;\n\n    void operator()(void* ptr) {\n      int ret = ::munmap(ptr, length);\n      TP_THROW_SYSTEM_IF(ret != 0, errno);\n    }\n  };\n\n  std::unique_ptr<uint8_t[], Deleter> ptr_{nullptr, Deleter{}};\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/nop.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <nop/serializer.h>\n#include <nop/status.h>\n#include <nop/utility/buffer_reader.h>\n#include <nop/utility/buffer_writer.h>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/optional.h>\n\nnamespace tensorpipe {\n\n// Libnop makes heavy use of templates, whereas TensorPipe is designed around\n// polymorphism (abstract interfaces and concrete derived classes). The two\n// don't mix well: for example, one can't have virtual method templates. One\n// technique to get around this is type erasure, which is however tricky to get\n// right because the \"fundamental\" operation(s) of libnop, (de)serialization,\n// are simultaneously templated on two types: the reader/writer and the object.\n// Ideally we'd like for both these sets of types to be dynamically extensible,\n// as we want to allow transpors to provide their own specialized readers and\n// writers, and channels could have their own custom objects that they want to\n// (de)serialize. New transports and channel could be implemented by third\n// parties and plugged in at runtime, so the sets of reader/writers and of\n// objects that we must support can't be known in advance.\n\n// We had originally found a solution to this pickle by doing two type erasures\n// one after the other, first on the reader/writer, which deals with bytes and\n// not objects and is thus not templated, and then on objects, leveraging the\n// fact that there is one libnop (de)serializer that takes a *pointer* to a\n// reader/writer giving us a \"hook\" on which to do polymorphism, by hardcoding a\n// pointer to the base reader/writer class as template parameter, but then\n// passing in an instance of a concrete subclass at runtime.\n\n// However it turned out that this performed poorly, apparently due to the\n// (de)serialization process consisting in many small calls to the reader/writer\n// which each had to perform a vtable lookup. So, instead, we decided to not\n// allow transports to utilize custom specialized readers/writers and to provide\n// a single global reader/writer class that is able to cover the two main usage\n// patterns we think are most likely to come up: reading/writing to a temporary\n// contiguous buffer, and reading/writing to a ringbuffer.\n\n// This reader and writer can operate either on one single buffer (ptr + len) or\n// on two buffers: in the latter case, they first consume the first one and,\n// when that fills up, they \"spill over\" into the second one. This is needed in\n// order to support the \"wrap around\" point in ringbuffers.\n\nclass NopReader final {\n public:\n  NopReader(const uint8_t* ptr, size_t len) : ptr1_(ptr), len1_(len) {}\n\n  NopReader(const uint8_t* ptr1, size_t len1, const uint8_t* ptr2, size_t len2)\n      : ptr1_(ptr1), len1_(len1), ptr2_(ptr2), len2_(len2) {}\n\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  nop::Status<void> Ensure(size_t size) {\n    if (likely(size <= len1_ + len2_)) {\n      return nop::ErrorStatus::None;\n    } else {\n      return nop::ErrorStatus::ReadLimitReached;\n    }\n  }\n\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  nop::Status<void> Read(uint8_t* byte) {\n    if (unlikely(len1_ == 0)) {\n      ptr1_ = ptr2_;\n      len1_ = len2_;\n      ptr2_ = nullptr;\n      len2_ = 0;\n    }\n\n    *byte = *ptr1_;\n    ptr1_++;\n    len1_--;\n    return nop::ErrorStatus::None;\n  }\n\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  nop::Status<void> Read(void* begin, void* end) {\n    size_t size =\n        reinterpret_cast<uint8_t*>(end) - reinterpret_cast<uint8_t*>(begin);\n\n    if (unlikely(len1_ < size)) {\n      std::memcpy(begin, ptr1_, len1_);\n      begin = reinterpret_cast<uint8_t*>(begin) + len1_;\n      size -= len1_;\n      ptr1_ = ptr2_;\n      len1_ = len2_;\n      ptr2_ = nullptr;\n      len2_ = 0;\n    }\n\n    std::memcpy(begin, ptr1_, size);\n    ptr1_ += size;\n    len1_ -= size;\n    return nop::ErrorStatus::None;\n  }\n\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  nop::Status<void> Skip(size_t paddingBytes) {\n    if (unlikely(len1_ < paddingBytes)) {\n      paddingBytes -= len1_;\n      ptr1_ = ptr2_;\n      len1_ = len2_;\n      ptr2_ = nullptr;\n      len2_ = 0;\n    }\n\n    ptr1_ += paddingBytes;\n    len1_ -= paddingBytes;\n    return nop::ErrorStatus::None;\n  }\n\n private:\n  const uint8_t* ptr1_ = nullptr;\n  size_t len1_ = 0;\n  const uint8_t* ptr2_ = nullptr;\n  size_t len2_ = 0;\n};\n\nclass NopWriter final {\n public:\n  NopWriter(uint8_t* ptr, size_t len) : ptr1_(ptr), len1_(len) {}\n  NopWriter(uint8_t* ptr1, size_t len1, uint8_t* ptr2, size_t len2)\n      : ptr1_(ptr1), len1_(len1), ptr2_(ptr2), len2_(len2) {}\n\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  nop::Status<void> Prepare(size_t size) {\n    if (likely(size <= len1_ + len2_)) {\n      return nop::ErrorStatus::None;\n    } else {\n      return nop::ErrorStatus::WriteLimitReached;\n    }\n  }\n\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  nop::Status<void> Write(uint8_t byte) {\n    if (unlikely(len1_ == 0)) {\n      ptr1_ = ptr2_;\n      len1_ = len2_;\n      ptr2_ = nullptr;\n      len2_ = 0;\n    }\n\n    *ptr1_ = byte;\n    ptr1_++;\n    len1_--;\n    return nop::ErrorStatus::None;\n  }\n\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  nop::Status<void> Write(const void* begin, const void* end) {\n    size_t size = reinterpret_cast<const uint8_t*>(end) -\n        reinterpret_cast<const uint8_t*>(begin);\n\n    if (unlikely(len1_ < size)) {\n      std::memcpy(ptr1_, begin, len1_);\n      begin = reinterpret_cast<const uint8_t*>(begin) + len1_;\n      size -= len1_;\n      ptr1_ = ptr2_;\n      len1_ = len2_;\n      ptr2_ = nullptr;\n      len2_ = 0;\n    }\n\n    std::memcpy(ptr1_, begin, size);\n    ptr1_ += size;\n    len1_ -= size;\n    return nop::ErrorStatus::None;\n  }\n\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  nop::Status<void> Skip(size_t paddingBytes, uint8_t paddingValue) {\n    if (unlikely(len1_ < paddingBytes)) {\n      std::memset(ptr1_, paddingValue, paddingBytes);\n      paddingBytes -= len1_;\n      ptr1_ = ptr2_;\n      len1_ = len2_;\n      ptr2_ = nullptr;\n      len2_ = 0;\n    }\n\n    std::memset(ptr1_, paddingValue, paddingBytes);\n    ptr1_ += paddingBytes;\n    len1_ -= paddingBytes;\n    return nop::ErrorStatus::None;\n  }\n\n private:\n  uint8_t* ptr1_ = nullptr;\n  size_t len1_ = 0;\n  uint8_t* ptr2_ = nullptr;\n  size_t len2_ = 0;\n};\n\n// The helpers to perform type erasure of the object type: a untemplated base\n// class exposing the methods we need for (de)serialization, and then templated\n// subclasses allowing to create a holder for each concrete libnop type.\n\nclass AbstractNopHolder {\n public:\n  virtual size_t getSize() const = 0;\n  virtual nop::Status<void> write(NopWriter& writer) const = 0;\n  virtual nop::Status<void> read(NopReader& reader) = 0;\n  virtual ~AbstractNopHolder() = default;\n};\n\ntemplate <typename T>\nclass NopHolder : public AbstractNopHolder {\n public:\n  T& getObject() {\n    return object_;\n  }\n\n  const T& getObject() const {\n    return object_;\n  }\n\n  size_t getSize() const override {\n    return nop::Encoding<T>::Size(object_);\n  }\n\n  nop::Status<void> write(NopWriter& writer) const override {\n    return nop::Encoding<T>::Write(object_, &writer);\n  }\n\n  nop::Status<void> read(NopReader& reader) override {\n    return nop::Encoding<T>::Read(&object_, &reader);\n  }\n\n private:\n  T object_;\n};\n\n} // namespace tensorpipe\n\nnamespace nop {\n\n// The `nop::Encoding` specialization for `tensorpipe::optional` was inspired\n// by that of `nop::Optional`, available here:\n// https://github.com/google/libnop/blob/master/include/nop/base/optional.h\ntemplate <typename T>\nstruct Encoding<tensorpipe::optional<T>> : EncodingIO<tensorpipe::optional<T>> {\n  using Type = tensorpipe::optional<T>;\n\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  static constexpr EncodingByte Prefix(const Type& value) {\n    return value ? Encoding<T>::Prefix(value.value()) : EncodingByte::Nil;\n  }\n\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  static constexpr std::size_t Size(const Type& value) {\n    return value ? Encoding<T>::Size(value.value())\n                 : BaseEncodingSize(EncodingByte::Nil);\n  }\n\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  static constexpr bool Match(EncodingByte prefix) {\n    return prefix == EncodingByte::Nil || Encoding<T>::Match(prefix);\n  }\n\n  template <typename Writer>\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  static constexpr Status<void> WritePayload(\n      EncodingByte prefix,\n      const Type& value,\n      Writer* writer) {\n    if (value) {\n      return Encoding<T>::WritePayload(prefix, value.value(), writer);\n    } else {\n      return {};\n    }\n  }\n\n  template <typename Reader>\n  // NOLINTNEXTLINE(readability-identifier-naming)\n  static constexpr Status<void> ReadPayload(\n      EncodingByte prefix,\n      Type* value,\n      Reader* reader) {\n    if (prefix == EncodingByte::Nil) {\n      value->reset();\n    } else {\n      T temp;\n      auto status = Encoding<T>::ReadPayload(prefix, &temp, reader);\n      if (!status) {\n        return status;\n      }\n\n      *value = std::move(temp);\n    }\n\n    return {};\n  }\n};\n\n} // namespace nop\n"
  },
  {
    "path": "tensorpipe/common/nvml_lib.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <nvml.h>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/dl.h>\n\n#define TP_NVML_CHECK(nvml_lib, a)                                \\\n  do {                                                            \\\n    nvmlReturn_t error = (a);                                     \\\n    if (error != NVML_SUCCESS) {                                  \\\n      const char* errorStr;                                       \\\n      errorStr = (nvml_lib).errorString(error);                   \\\n      TP_THROW_ASSERT() << __TP_EXPAND_OPD(a) << \" \" << errorStr; \\\n    }                                                             \\\n  } while (false)\n\nnamespace tensorpipe {\n\n// Master list of all symbols we care about from libnvidia-ml.\n\n#define TP_FORALL_NVML_SYMBOLS(_)                                             \\\n  _(deviceGetComputeRunningProcesses,                                         \\\n    nvmlDeviceGetComputeRunningProcesses,                                     \\\n    nvmlReturn_t,                                                             \\\n    (nvmlDevice_t, unsigned int*, nvmlProcessInfo_t*))                        \\\n  _(deviceGetCount_v2, nvmlDeviceGetCount_v2, nvmlReturn_t, (unsigned int*))  \\\n  _(deviceGetHandleByIndex_v2,                                                \\\n    nvmlDeviceGetHandleByIndex_v2,                                            \\\n    nvmlReturn_t,                                                             \\\n    (unsigned int, nvmlDevice_t*))                                            \\\n  _(deviceGetHandleByUUID,                                                    \\\n    nvmlDeviceGetHandleByUUID,                                                \\\n    nvmlReturn_t,                                                             \\\n    (const char*, nvmlDevice_t*))                                             \\\n  _(deviceGetP2PStatus,                                                       \\\n    nvmlDeviceGetP2PStatus,                                                   \\\n    nvmlReturn_t,                                                             \\\n    (nvmlDevice_t, nvmlDevice_t, nvmlGpuP2PCapsIndex_t, nvmlGpuP2PStatus_t*)) \\\n  _(deviceGetUUID,                                                            \\\n    nvmlDeviceGetUUID,                                                        \\\n    nvmlReturn_t,                                                             \\\n    (nvmlDevice_t, char*, unsigned int))                                      \\\n  _(errorString, nvmlErrorString, const char*, (nvmlReturn_t))                \\\n  _(init_v2, nvmlInit_v2, nvmlReturn_t, ())                                   \\\n  _(shutdown, nvmlShutdown, nvmlReturn_t, ())\n\n// Wrapper for libnvidia-ml.\n\nclass NvmlLib {\n private:\n  explicit NvmlLib(DynamicLibraryHandle dlhandle)\n      : dlhandle_(std::move(dlhandle)) {}\n\n  DynamicLibraryHandle dlhandle_;\n  bool inited_ = false;\n\n#define TP_DECLARE_FIELD(method_name, function_name, return_type, args_types) \\\n  return_type(*function_name##_ptr_) args_types = nullptr;\n  TP_FORALL_NVML_SYMBOLS(TP_DECLARE_FIELD)\n#undef TP_DECLARE_FIELD\n\n public:\n  NvmlLib() = default;\n\n  // Implement another RAII layer (on top of the one of DynamicLibraryHandle) to\n  // deal with nvmlInit_v2 and nvmlShutdown. The default move assignment would\n  // fail to shutdown NVML when another instance is moved into it, and it would\n  // cause the destructor to shutdown a moved-out instance.\n  NvmlLib(const NvmlLib&) = delete;\n  NvmlLib& operator=(const NvmlLib&) = delete;\n  NvmlLib(NvmlLib&& other) {\n    *this = std::move(other);\n  }\n  NvmlLib& operator=(NvmlLib&& other) {\n    std::swap(dlhandle_, other.dlhandle_);\n    std::swap(inited_, other.inited_);\n#define TP_SWAP_FIELD(method_name, function_name, return_type, args_types) \\\n  std::swap(function_name##_ptr_, other.function_name##_ptr_);\n    TP_FORALL_NVML_SYMBOLS(TP_SWAP_FIELD)\n#undef TP_SWAP_FIELD\n    return *this;\n  }\n\n#define TP_FORWARD_CALL(method_name, function_name, return_type, args_types) \\\n  template <typename... Args>                                                \\\n  auto method_name(Args&&... args) const {                                   \\\n    return (*function_name##_ptr_)(std::forward<Args>(args)...);             \\\n  }\n  TP_FORALL_NVML_SYMBOLS(TP_FORWARD_CALL)\n#undef TP_FORWARD_CALL\n\n  static std::tuple<Error, NvmlLib> create() {\n    Error error;\n    DynamicLibraryHandle dlhandle;\n    // To keep things \"neat\" and contained, we open in \"local\" mode (as\n    // opposed to global) so that the cuda symbols can only be resolved\n    // through this handle and are not exposed (a.k.a., \"leaked\") to other\n    // shared objects.\n    std::tie(error, dlhandle) = DynamicLibraryHandle::create(\n        \"libnvidia-ml.so.1\", RTLD_LOCAL | RTLD_LAZY);\n    if (error) {\n      return std::make_tuple(std::move(error), NvmlLib());\n    }\n    // Log at level 9 as we can't know whether this will be used in a transport\n    // or channel, thus err on the side of this being as low-level as possible\n    // because we don't expect this to be of interest that often.\n    TP_VLOG(9) << [&]() -> std::string {\n      std::string filename;\n      std::tie(error, filename) = dlhandle.getFilename();\n      if (error) {\n        return \"Couldn't determine location of shared library libnvidia-ml.so.1: \" +\n            error.what();\n      }\n      return \"Found shared library libnvidia-ml.so.1 at \" + filename;\n    }();\n    NvmlLib lib(std::move(dlhandle));\n#define TP_LOAD_SYMBOL(method_name, function_name, return_type, args_types) \\\n  {                                                                         \\\n    void* ptr;                                                              \\\n    std::tie(error, ptr) = lib.dlhandle_.loadSymbol(#function_name);        \\\n    if (error) {                                                            \\\n      return std::make_tuple(std::move(error), NvmlLib());                  \\\n    }                                                                       \\\n    TP_THROW_ASSERT_IF(ptr == nullptr);                                     \\\n    lib.function_name##_ptr_ =                                              \\\n        reinterpret_cast<decltype(function_name##_ptr_)>(ptr);              \\\n  }\n    TP_FORALL_NVML_SYMBOLS(TP_LOAD_SYMBOL)\n#undef TP_LOAD_SYMBOL\n    TP_NVML_CHECK(lib, lib.init_v2());\n    lib.inited_ = true;\n    return std::make_tuple(Error::kSuccess, std::move(lib));\n  }\n\n  ~NvmlLib() {\n    if (inited_) {\n      TP_DCHECK(dlhandle_.hasValue());\n      TP_NVML_CHECK(*this, shutdown());\n    }\n  }\n};\n\n#undef TP_FORALL_NVML_SYMBOLS\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/optional.h",
    "content": "#pragma once\n\n#include <optional>\n\nnamespace tensorpipe {\n\nusing std::optional;\nusing std::nullopt;\n\n} // namespace tensorpipe\n\n"
  },
  {
    "path": "tensorpipe/common/queue.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <condition_variable>\n#include <deque>\n#include <mutex>\n\nnamespace tensorpipe {\n\ntemplate <typename T>\nclass Queue {\n public:\n  explicit Queue(int capacity = 1) : capacity_(capacity) {}\n\n  void push(T t) {\n    std::unique_lock<std::mutex> lock(mutex_);\n    while (items_.size() >= capacity_) {\n      cv_.wait(lock);\n    }\n    items_.push_back(std::move(t));\n    cv_.notify_all();\n  }\n\n  T pop() {\n    std::unique_lock<std::mutex> lock(mutex_);\n    while (items_.size() == 0) {\n      cv_.wait(lock);\n    }\n    T t(std::move(items_.front()));\n    items_.pop_front();\n    cv_.notify_all();\n    return t;\n  }\n\n private:\n  std::mutex mutex_;\n  std::condition_variable cv_;\n  const int capacity_;\n  std::deque<T> items_;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/ringbuffer.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <sys/types.h>\n\n#include <atomic>\n#include <cstring>\n#include <memory>\n#include <type_traits>\n\n#include <tensorpipe/common/system.h>\n\n///\n/// C++17 implementation of shared-memory friendly perf_event style ringbuffer.\n/// It's designed to avoid parallel access and provide (almost) zero-copy\n///\n///\n/// A ringbuffer has a header and a data members that can be allocated\n/// independently from the ringbuffer object, allowing the ringbuffer object\n/// to be stored in process' exclusive memory while header and data\n/// could be in shared memory.\n///\n/// Multiple ringbuffers can reference the same header + data.\n///\n/// Multiple producers (or consumers) can reference the same ringbuffer.\n///\n/// Synchronization between all producers/consumers of all ringbuffers that\n/// reference the same header + pair pairs is done using atomic operations\n/// care is taken to guarantee lock-free implementations, reduce the usage\n/// of LOCK prefixes and the access to non-exclusive cache lines by CPUs.\n///\n/// Producers write data atomically at ringbuffer's head, while Consumers\n/// write data atomically at ringbuffer's tail.\n///\n\nnamespace tensorpipe {\n\n///\n/// RingBufferHeader contains the head, tail and other control information\n/// of the RingBuffer.\n///\n/// <kMinByteSize_> is the minimum byte size of the circular buffer. The actual\n/// size is the smallest power of 2 larger than kMinByteSize_. Enforcing the\n/// size to be a power of two avoids costly division/modulo operations.\n///\ntemplate <int NumRoles>\nclass RingBufferHeader {\n public:\n  static_assert(NumRoles > 0, \"\");\n  const uint64_t kDataPoolByteSize;\n  const uint64_t kDataModMask;\n\n  RingBufferHeader(const RingBufferHeader&) = delete;\n  RingBufferHeader(RingBufferHeader&&) = delete;\n\n  // Implementation uses power of 2 arithmetic to avoid costly modulo.\n  // So build the largest RingBuffer with size of the smallest power of 2 >=\n  // <byte_size>.\n  explicit RingBufferHeader(uint64_t minDataByteSize)\n      : kDataPoolByteSize{nextPow2(minDataByteSize)},\n        kDataModMask{kDataPoolByteSize - 1} {\n    // Minimum size where implementation of bit shift arithmetic works.\n    TP_DCHECK_GE(kDataPoolByteSize, 2)\n        << \"Minimum supported ringbuffer data size is 2 bytes\";\n    TP_DCHECK(isPow2(kDataPoolByteSize))\n        << kDataPoolByteSize << \" is not a power of 2\";\n    TP_DCHECK_LE(kDataPoolByteSize, std::numeric_limits<int>::max())\n        << \"Logic piggy-backs read/write size on ints, to be safe forbid\"\n           \" buffer to ever be larger than what an int can hold\";\n    for (int roleIdx = 0; roleIdx < NumRoles; ++roleIdx) {\n      inTx_[roleIdx].clear();\n      markers_[roleIdx] = 0;\n    }\n  }\n\n  // Being in a transaction (either a read or a write one) gives a user of the\n  // ringbuffer (either a consumer or a producer, respectively) the right to\n  // read the head and tail and to modify the one they are responsible for (the\n  // tail and the head, respectively). Accessing the head or tail outside of a\n  // transaction could lead to races. This also means we need memory barriers\n  // around a transaction, to make sure side-effects of other users are visible\n  // upon entering and our side effects become visible to others upon exiting.\n  // We also must prevent the compiler from reordering memory accesses. Failure\n  // to do so may result in our reads of head/tail to look like they occurred\n  // before we entered the transaction, and writes to them to look like they\n  // occurred after we exited it. In order to get the desired behavior, we use\n  // the acquire memory order when starting a transaction (which means no later\n  // memory access can be moved before it) and the release memory order when\n  // ending it (no earlier memory access can be moved after it).\n\n  template <int RoleIdx>\n  [[nodiscard]] bool beginTransaction() {\n    static_assert(0 <= RoleIdx && RoleIdx < NumRoles, \"\");\n    return inTx_[RoleIdx].test_and_set(std::memory_order_acquire);\n  }\n\n  template <int RoleIdx>\n  void endTransaction() {\n    static_assert(0 <= RoleIdx && RoleIdx < NumRoles, \"\");\n    inTx_[RoleIdx].clear(std::memory_order_release);\n  }\n\n  // Reading the head and tail is what gives a user of the ringbuffer (either a\n  // consumer or a producer) the right to access the buffer's contents: the\n  // producer can write on [head, tail) (modulo the size), the consumer can read\n  // from [tail, head). And, when the producer increases the head, or when the\n  // consumer increases the tail, they give users of the opposite type the right\n  // to access some of the memory that was previously under their control. Thus,\n  // just like we do for the transactions, we need memory barriers around reads\n  // and writes to the head and tail, with the same reasoning for memory orders.\n\n  template <int RoleIdx>\n  uint64_t readMarker() const {\n    static_assert(0 <= RoleIdx && RoleIdx < NumRoles, \"\");\n    return markers_[RoleIdx].load(std::memory_order_acquire);\n  }\n\n  template <int RoleIdx>\n  void incMarker(uint64_t inc) {\n    static_assert(0 <= RoleIdx && RoleIdx < NumRoles, \"\");\n    markers_[RoleIdx].fetch_add(inc, std::memory_order_release);\n  }\n\n protected:\n  std::array<std::atomic_flag, NumRoles> inTx_;\n  std::array<std::atomic<uint64_t>, NumRoles> markers_;\n\n  // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2427.html#atomics.lockfree\n  // static_assert(\n  //     decltype(markers_)::value_type::is_always_lock_free,\n  //     \"Only lock-free atomics are supported\");\n};\n\n///\n/// Process' view of a ring buffer.\n/// This cannot reside in shared memory since it has pointers.\n///\ntemplate <int NumRoles>\nclass RingBuffer final {\n public:\n  RingBuffer() = default;\n\n  RingBuffer(RingBufferHeader<NumRoles>* header, uint8_t* data)\n      : header_(header), data_(data) {\n    TP_THROW_IF_NULLPTR(header_) << \"Header cannot be nullptr\";\n    TP_THROW_IF_NULLPTR(data_) << \"Data cannot be nullptr\";\n  }\n\n  const RingBufferHeader<NumRoles>& getHeader() const {\n    return *header_;\n  }\n\n  RingBufferHeader<NumRoles>& getHeader() {\n    return *header_;\n  }\n\n  const uint8_t* getData() const {\n    return data_;\n  }\n\n  uint8_t* getData() {\n    return data_;\n  }\n\n protected:\n  RingBufferHeader<NumRoles>* header_ = nullptr;\n  uint8_t* data_ = nullptr;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/ringbuffer_read_write_ops.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <array>\n#include <functional>\n#include <memory>\n#include <tuple>\n#include <utility>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/nop.h>\n#include <tensorpipe/common/ringbuffer_role.h>\n\nnamespace tensorpipe {\n\n// Reads happen only if the user supplied a callback (and optionally\n// a destination buffer). The callback is run from the event loop\n// thread upon receiving a notification from our peer.\n//\n// The memory pointer argument to the callback is valid only for the\n// duration of the callback. If the memory contents must be\n// preserved for longer, it must be copied elsewhere.\n//\nclass RingbufferReadOperation {\n  enum Mode {\n    READ_LENGTH,\n    READ_PAYLOAD,\n  };\n\n public:\n  using read_callback_fn =\n      std::function<void(const Error& error, const void* ptr, size_t len)>;\n  // Read into a user-provided buffer of known length.\n  inline RingbufferReadOperation(void* ptr, size_t len, read_callback_fn fn);\n  // Read into an auto-allocated buffer, whose length is read from the wire.\n  explicit inline RingbufferReadOperation(read_callback_fn fn);\n  // Read into a user-provided libnop object, read length from the wire.\n  inline RingbufferReadOperation(\n      AbstractNopHolder* nopObject,\n      read_callback_fn fn);\n\n  // Processes a pending read.\n  template <int NumRoles, int RoleIdx>\n  inline size_t handleRead(RingBufferRole<NumRoles, RoleIdx>& inbox);\n\n  bool completed() const {\n    return (mode_ == READ_PAYLOAD && bytesRead_ == len_);\n  }\n\n  inline void handleError(const Error& error);\n\n private:\n  Mode mode_{READ_LENGTH};\n  void* ptr_{nullptr};\n  AbstractNopHolder* nopObject_{nullptr};\n  std::unique_ptr<uint8_t[]> buf_;\n  size_t len_{0};\n  size_t bytesRead_{0};\n  read_callback_fn fn_;\n  // Use a separare flag, rather than checking if ptr_ == nullptr, to catch the\n  // case of a user explicitly passing in a nullptr with length zero, in which\n  // case we must check that the length matches the header we see on the wire.\n  const bool ptrProvided_;\n\n  template <int NumRoles, int RoleIdx>\n  inline ssize_t readNopObject(RingBufferRole<NumRoles, RoleIdx>& inbox);\n};\n\n// Writes happen only if the user supplied a memory pointer, the\n// number of bytes to write, and a callback to execute upon\n// completion of the write.\n//\n// The memory pointed to by the pointer may only be reused or freed\n// after the callback has been called.\n//\nclass RingbufferWriteOperation {\n  enum Mode {\n    WRITE_LENGTH,\n    WRITE_PAYLOAD,\n  };\n\n public:\n  using write_callback_fn = std::function<void(const Error& error)>;\n  // Write from a user-provided buffer of known length.\n  inline RingbufferWriteOperation(\n      const void* ptr,\n      size_t len,\n      write_callback_fn fn);\n  // Write from a user-provided libnop object.\n  inline RingbufferWriteOperation(\n      const AbstractNopHolder* nopObject,\n      write_callback_fn fn);\n\n  template <int NumRoles, int RoleIdx>\n  inline size_t handleWrite(RingBufferRole<NumRoles, RoleIdx>& outbox);\n\n  bool completed() const {\n    return (mode_ == WRITE_PAYLOAD && bytesWritten_ == len_);\n  }\n\n  inline void handleError(const Error& error);\n\n private:\n  Mode mode_{WRITE_LENGTH};\n  const void* ptr_{nullptr};\n  const AbstractNopHolder* nopObject_{nullptr};\n  size_t len_{0};\n  size_t bytesWritten_{0};\n  write_callback_fn fn_;\n\n  template <int NumRoles, int RoleIdx>\n  inline ssize_t writeNopObject(RingBufferRole<NumRoles, RoleIdx>& outbox);\n};\n\nRingbufferReadOperation::RingbufferReadOperation(\n    void* ptr,\n    size_t len,\n    read_callback_fn fn)\n    : ptr_(ptr), len_(len), fn_(std::move(fn)), ptrProvided_(true) {}\n\nRingbufferReadOperation::RingbufferReadOperation(read_callback_fn fn)\n    : fn_(std::move(fn)), ptrProvided_(false) {}\n\nRingbufferReadOperation::RingbufferReadOperation(\n    AbstractNopHolder* nopObject,\n    read_callback_fn fn)\n    : nopObject_(nopObject), fn_(std::move(fn)), ptrProvided_(false) {}\n\ntemplate <int NumRoles, int RoleIdx>\nsize_t RingbufferReadOperation::handleRead(\n    RingBufferRole<NumRoles, RoleIdx>& inbox) {\n  ssize_t ret;\n  size_t bytesReadNow = 0;\n\n  // Start read transaction. This end of the connection is the only consumer for\n  // this ringbuffer, and all reads are done from the reactor thread, so there\n  // cannot be another transaction already going on. Fail hard in case.\n  ret = inbox.startTx();\n  TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n  if (mode_ == READ_LENGTH) {\n    uint32_t length;\n    ret = inbox.template readInTx</*AllowPartial=*/false>(\n        &length, sizeof(length));\n    if (likely(ret >= 0)) {\n      mode_ = READ_PAYLOAD;\n      bytesReadNow += ret;\n      if (nopObject_ != nullptr) {\n        len_ = length;\n      } else if (ptrProvided_) {\n        TP_DCHECK_EQ(length, len_);\n      } else {\n        len_ = length;\n        buf_ = std::make_unique<uint8_t[]>(len_);\n        ptr_ = buf_.get();\n      }\n    } else if (unlikely(ret != -ENODATA)) {\n      TP_THROW_SYSTEM(-ret);\n    }\n  }\n\n  if (mode_ == READ_PAYLOAD) {\n    if (nopObject_ != nullptr) {\n      ret = readNopObject(inbox);\n    } else {\n      ret = inbox.template readInTx</*AllowPartial=*/true>(\n          reinterpret_cast<uint8_t*>(ptr_) + bytesRead_, len_ - bytesRead_);\n    }\n    if (likely(ret >= 0)) {\n      bytesRead_ += ret;\n      bytesReadNow += ret;\n    } else if (unlikely(ret != -ENODATA)) {\n      TP_THROW_SYSTEM(-ret);\n    }\n  }\n\n  ret = inbox.commitTx();\n  TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n  if (completed()) {\n    fn_(Error::kSuccess, ptr_, len_);\n  }\n\n  return bytesReadNow;\n}\n\ntemplate <int NumRoles, int RoleIdx>\nssize_t RingbufferReadOperation::readNopObject(\n    RingBufferRole<NumRoles, RoleIdx>& inbox) {\n  TP_THROW_ASSERT_IF(len_ > inbox.getSize());\n\n  ssize_t numBuffers;\n  std::array<typename RingBufferRole<NumRoles, RoleIdx>::Buffer, 2> buffers;\n  std::tie(numBuffers, buffers) =\n      inbox.template accessContiguousInTx</*AllowPartial=*/false>(len_);\n  if (unlikely(numBuffers < 0)) {\n    return numBuffers;\n  }\n\n  NopReader reader(\n      buffers[0].ptr, buffers[0].len, buffers[1].ptr, buffers[1].len);\n  nop::Status<void> status = nopObject_->read(reader);\n  if (status.error() == nop::ErrorStatus::ReadLimitReached) {\n    return -ENODATA;\n  } else if (status.has_error()) {\n    return -EINVAL;\n  }\n\n  return len_;\n}\n\nvoid RingbufferReadOperation::handleError(const Error& error) {\n  fn_(error, nullptr, 0);\n}\n\nRingbufferWriteOperation::RingbufferWriteOperation(\n    const void* ptr,\n    size_t len,\n    write_callback_fn fn)\n    : ptr_(ptr), len_(len), fn_(std::move(fn)) {}\n\nRingbufferWriteOperation::RingbufferWriteOperation(\n    const AbstractNopHolder* nopObject,\n    write_callback_fn fn)\n    : nopObject_(nopObject), len_(nopObject_->getSize()), fn_(std::move(fn)) {}\n\ntemplate <int NumRoles, int RoleIdx>\nsize_t RingbufferWriteOperation::handleWrite(\n    RingBufferRole<NumRoles, RoleIdx>& outbox) {\n  ssize_t ret;\n  size_t bytesWrittenNow = 0;\n\n  // Start write transaction. This end of the connection is the only producer\n  // for this ringbuffer, and all writes are done from the reactor thread, so\n  // there cannot be another transaction already going on. Fail hard in case.\n  ret = outbox.startTx();\n  TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n  if (mode_ == WRITE_LENGTH) {\n    uint32_t length = len_;\n    ret = outbox.template writeInTx</*AllowPartial=*/false>(\n        &length, sizeof(length));\n    if (likely(ret >= 0)) {\n      mode_ = WRITE_PAYLOAD;\n      bytesWrittenNow += ret;\n    } else if (unlikely(ret != -ENODATA)) {\n      TP_THROW_SYSTEM(-ret);\n    }\n  }\n\n  if (mode_ == WRITE_PAYLOAD) {\n    if (nopObject_ != nullptr) {\n      ret = writeNopObject(outbox);\n    } else {\n      ret = outbox.template writeInTx</*AllowPartial=*/true>(\n          reinterpret_cast<const uint8_t*>(ptr_) + bytesWritten_,\n          len_ - bytesWritten_);\n    }\n    if (likely(ret >= 0)) {\n      bytesWritten_ += ret;\n      bytesWrittenNow += ret;\n    } else if (unlikely(ret != -ENODATA)) {\n      TP_THROW_SYSTEM(-ret);\n    }\n  }\n\n  ret = outbox.commitTx();\n  TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n  if (completed()) {\n    fn_(Error::kSuccess);\n  }\n\n  return bytesWrittenNow;\n}\n\ntemplate <int NumRoles, int RoleIdx>\nssize_t RingbufferWriteOperation::writeNopObject(\n    RingBufferRole<NumRoles, RoleIdx>& outbox) {\n  TP_THROW_ASSERT_IF(len_ > outbox.getSize());\n\n  ssize_t numBuffers;\n  std::array<typename RingBufferRole<NumRoles, RoleIdx>::Buffer, 2> buffers;\n  std::tie(numBuffers, buffers) =\n      outbox.template accessContiguousInTx</*AllowPartial=*/false>(len_);\n  if (unlikely(numBuffers < 0)) {\n    return numBuffers;\n  }\n\n  NopWriter writer(\n      buffers[0].ptr, buffers[0].len, buffers[1].ptr, buffers[1].len);\n  nop::Status<void> status = nopObject_->write(writer);\n  if (status.error() == nop::ErrorStatus::WriteLimitReached) {\n    return -ENODATA;\n  } else if (status.has_error()) {\n    return -EINVAL;\n  }\n\n  return len_;\n}\n\nvoid RingbufferWriteOperation::handleError(const Error& error) {\n  fn_(error);\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/ringbuffer_role.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <array>\n#include <cstring>\n#include <tuple>\n#include <utility>\n\n#include <tensorpipe/common/ringbuffer.h>\n\nnamespace tensorpipe {\n\n///\n/// Role of a RingBuffer.\n///\n/// Provides methods to read and write data into a ringbuffer.\n///\ntemplate <int NumRoles, int RoleIdx>\nclass RingBufferRole {\n public:\n  static_assert(0 <= RoleIdx && RoleIdx < NumRoles, \"\");\n\n  RingBufferRole() = delete;\n\n  explicit RingBufferRole(RingBuffer<NumRoles>& rb)\n      : header_{rb.getHeader()}, data_{rb.getData()} {\n    TP_THROW_IF_NULLPTR(data_);\n  }\n\n  RingBufferRole(const RingBufferRole&) = delete;\n  RingBufferRole(RingBufferRole&&) = delete;\n\n  RingBufferRole& operator=(const RingBufferRole&) = delete;\n  RingBufferRole& operator=(RingBufferRole&&) = delete;\n\n  ~RingBufferRole() noexcept {\n    TP_THROW_ASSERT_IF(inTx());\n  }\n\n  size_t getSize() const {\n    return header_.kDataPoolByteSize;\n  }\n\n  //\n  // Transaction based API.\n  //\n  // Only one instance of a role can have an active transaction at any time.\n  // *InTx* operations that fail do not cancel transaction.\n  //\n  bool inTx() const noexcept {\n    return inTx_;\n  }\n\n  [[nodiscard]] ssize_t startTx() noexcept {\n    if (unlikely(inTx())) {\n      return -EBUSY;\n    }\n    if (header_.template beginTransaction<RoleIdx>()) {\n      return -EAGAIN;\n    }\n    inTx_ = true;\n    TP_DCHECK_EQ(txSize_, 0);\n    return 0;\n  }\n\n  [[nodiscard]] ssize_t commitTx() noexcept {\n    if (unlikely(!inTx())) {\n      return -EINVAL;\n    }\n    header_.template incMarker<RoleIdx>(txSize_);\n    txSize_ = 0;\n    inTx_ = false;\n    header_.template endTransaction<RoleIdx>();\n    return 0;\n  }\n\n  [[nodiscard]] ssize_t cancelTx() noexcept {\n    if (unlikely(!inTx())) {\n      return -EINVAL;\n    }\n    txSize_ = 0;\n    inTx_ = false;\n    header_.template endTransaction<RoleIdx>();\n    return 0;\n  }\n\n  struct Buffer {\n    uint8_t* ptr{nullptr};\n    size_t len{0};\n  };\n\n  // The first item is negative in case of error, otherwise it contains how many\n  // elements of the array are valid (0, 1 or 2). The elements are ptr+len pairs\n  // of contiguous areas of the ringbuffer that, chained together, represent a\n  // slice of the requested size (or less if not enough data is available, and\n  // AllowPartial is set to true).\n  template <bool AllowPartial>\n  [[nodiscard]] std::pair<ssize_t, std::array<Buffer, 2>> accessContiguousInTx(\n      size_t size) noexcept {\n    std::array<Buffer, 2> result;\n\n    if (unlikely(!inTx())) {\n      return {-EINVAL, result};\n    }\n\n    if (unlikely(size == 0)) {\n      return {0, result};\n    }\n\n    const uint64_t tail = header_.template readMarker<RoleIdx>();\n    const uint64_t head =\n        header_.template readMarker<(RoleIdx + 1) % NumRoles>() +\n        (RoleIdx + 1 == NumRoles ? header_.kDataPoolByteSize : 0);\n    TP_DCHECK_LE(head - tail, header_.kDataPoolByteSize);\n\n    const size_t avail = head - tail - txSize_;\n    TP_DCHECK_GE(avail, 0);\n\n    if (!AllowPartial && avail < size) {\n      return {-ENODATA, result};\n    }\n\n    if (avail == 0) {\n      return {0, result};\n    }\n\n    size = std::min(size, avail);\n\n    const uint64_t start = (tail + txSize_) & header_.kDataModMask;\n    const uint64_t end = (start + size) & header_.kDataModMask;\n\n    txSize_ += size;\n\n    // end == 0 is the same as end == bufferSize, in which case it doesn't wrap.\n    const bool wrap = (start >= end && end > 0);\n    if (likely(!wrap)) {\n      result[0] = {.ptr = data_ + start, .len = size};\n      return {1, result};\n    } else {\n      result[0] = {\n          .ptr = data_ + start, .len = header_.kDataPoolByteSize - start};\n      result[1] = {.ptr = data_, .len = end};\n      return {2, result};\n    }\n  }\n\n  // Increment our marker without doing anything, i.e., \"skip\" over the data.\n  [[nodiscard]] ssize_t incMarkerInTx(size_t size) {\n    // We could implement this from scratch but we'd rather re-use the logic\n    // from accessContiguous as it's easy to get it wrong.\n    ssize_t ret;\n    std::array<Buffer, 2> buffers;\n    std::tie(ret, buffers) = accessContiguousInTx</*allowPartial=*/false>(size);\n    return ret;\n  }\n\n  // Copy data from the ringbuffer into the provided buffer, up to the given\n  // size (only copy less data if AllowPartial is set to true).\n  template <bool AllowPartial>\n  [[nodiscard]] ssize_t readInTx(void* buffer, const size_t size) noexcept {\n    ssize_t numBuffers;\n    std::array<Buffer, 2> buffers;\n    std::tie(numBuffers, buffers) = accessContiguousInTx<AllowPartial>(size);\n\n    if (unlikely(numBuffers < 0)) {\n      return numBuffers;\n    }\n\n    if (unlikely(numBuffers == 0)) {\n      // Nothing to do.\n      return 0;\n    } else if (likely(numBuffers == 1)) {\n      std::memcpy(buffer, buffers[0].ptr, buffers[0].len);\n      return buffers[0].len;\n    } else if (likely(numBuffers == 2)) {\n      std::memcpy(buffer, buffers[0].ptr, buffers[0].len);\n      std::memcpy(\n          reinterpret_cast<uint8_t*>(buffer) + buffers[0].len,\n          buffers[1].ptr,\n          buffers[1].len);\n      return buffers[0].len + buffers[1].len;\n    } else {\n      TP_THROW_ASSERT() << \"Bad number of buffers: \" << numBuffers;\n      // Dummy return to make the compiler happy.\n      return -EINVAL;\n    }\n  }\n\n  // Copy data from the provided buffer into the ringbuffer, up to the given\n  // size (only copy less data if AllowPartial is set to true).\n  template <bool AllowPartial>\n  [[nodiscard]] ssize_t writeInTx(\n      const void* buffer,\n      const size_t size) noexcept {\n    ssize_t numBuffers;\n    std::array<Buffer, 2> buffers;\n    std::tie(numBuffers, buffers) = accessContiguousInTx<AllowPartial>(size);\n\n    if (unlikely(numBuffers < 0)) {\n      return numBuffers;\n    }\n\n    if (unlikely(numBuffers == 0)) {\n      // Nothing to do.\n      return 0;\n    } else if (likely(numBuffers == 1)) {\n      std::memcpy(buffers[0].ptr, buffer, buffers[0].len);\n      return buffers[0].len;\n    } else if (likely(numBuffers == 2)) {\n      std::memcpy(buffers[0].ptr, buffer, buffers[0].len);\n      std::memcpy(\n          buffers[1].ptr,\n          reinterpret_cast<const uint8_t*>(buffer) + buffers[0].len,\n          buffers[1].len);\n      return buffers[0].len + buffers[1].len;\n    } else {\n      TP_THROW_ASSERT() << \"Bad number of buffers: \" << numBuffers;\n      // Dummy return to make the compiler happy.\n      return -EINVAL;\n    }\n  }\n\n  //\n  // High-level atomic operations.\n  //\n\n  // Copy data from the ringbuffer into the provided buffer, exactly the given\n  // size. Take care of opening and closing the transaction.\n  [[nodiscard]] ssize_t read(void* buffer, const size_t size) noexcept {\n    auto ret = startTx();\n    if (0 > ret) {\n      return ret;\n    }\n\n    ret = readInTx</*AllowPartial=*/false>(buffer, size);\n    if (0 > ret) {\n      auto r = cancelTx();\n      TP_DCHECK_EQ(r, 0);\n      return ret;\n    }\n    TP_DCHECK_EQ(ret, size);\n\n    ret = commitTx();\n    TP_DCHECK_EQ(ret, 0);\n\n    return size;\n  }\n\n  // Copy data from the provided buffer into the ringbuffer, exactly the given\n  // size. Take care of opening and closing the transaction.\n  [[nodiscard]] ssize_t write(const void* buffer, size_t size) noexcept {\n    auto ret = startTx();\n    if (0 > ret) {\n      return ret;\n    }\n\n    ret = writeInTx</*AllowPartial=*/false>(buffer, size);\n    if (0 > ret) {\n      auto r = cancelTx();\n      TP_DCHECK_EQ(r, 0);\n      return ret;\n    }\n    TP_DCHECK_EQ(ret, size);\n\n    ret = commitTx();\n    TP_DCHECK_EQ(ret, 0);\n\n    return size;\n  }\n\n private:\n  RingBufferHeader<NumRoles>& header_;\n  uint8_t* const data_;\n  unsigned txSize_ = 0;\n  bool inTx_{false};\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/shm_ringbuffer.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <tensorpipe/common/fd.h>\n#include <tensorpipe/common/ringbuffer.h>\n#include <tensorpipe/common/shm_segment.h>\n\nnamespace tensorpipe {\n\n/// Creates ringbuffer on shared memory.\n///\n/// <minRbByteSize> is the minimum size of the data section of the RingBuffer.\n///\ntemplate <int NumRoles>\nstd::tuple<Error, ShmSegment, ShmSegment, RingBuffer<NumRoles>>\ncreateShmRingBuffer(size_t minRbByteSize) {\n  Error error;\n  ShmSegment headerSegment;\n  RingBufferHeader<NumRoles>* header;\n  std::tie(error, headerSegment, header) =\n      ShmSegment::create<RingBufferHeader<NumRoles>>(minRbByteSize);\n  if (error) {\n    return std::make_tuple(\n        std::move(error), ShmSegment(), ShmSegment(), RingBuffer<NumRoles>());\n  }\n\n  ShmSegment dataSegment;\n  uint8_t* data;\n  std::tie(error, dataSegment, data) =\n      ShmSegment::create<uint8_t[]>(header->kDataPoolByteSize);\n  if (error) {\n    return std::make_tuple(\n        std::move(error), ShmSegment(), ShmSegment(), RingBuffer<NumRoles>());\n  }\n\n  // Note: cannot use implicit construction from initializer list on GCC 5.5:\n  // \"converting to XYZ from initializer list would use explicit constructor\".\n  return std::make_tuple(\n      Error::kSuccess,\n      std::move(headerSegment),\n      std::move(dataSegment),\n      RingBuffer<NumRoles>(header, data));\n}\n\ntemplate <int NumRoles>\nstd::tuple<Error, ShmSegment, ShmSegment, RingBuffer<NumRoles>>\nloadShmRingBuffer(Fd headerFd, Fd dataFd) {\n  Error error;\n  ShmSegment headerSegment;\n  RingBufferHeader<NumRoles>* header;\n  std::tie(error, headerSegment, header) =\n      ShmSegment::load<RingBufferHeader<NumRoles>>(std::move(headerFd));\n  if (error) {\n    return std::make_tuple(\n        std::move(error), ShmSegment(), ShmSegment(), RingBuffer<NumRoles>());\n  }\n  constexpr auto kHeaderSize = sizeof(RingBufferHeader<NumRoles>);\n  if (unlikely(kHeaderSize != headerSegment.getSize())) {\n    TP_THROW_SYSTEM(EPERM) << \"Header segment of unexpected size\";\n  }\n\n  ShmSegment dataSegment;\n  uint8_t* data;\n  std::tie(error, dataSegment, data) =\n      ShmSegment::load<uint8_t[]>(std::move(dataFd));\n  if (error) {\n    return std::make_tuple(\n        std::move(error), ShmSegment(), ShmSegment(), RingBuffer<NumRoles>());\n  }\n  if (unlikely(header->kDataPoolByteSize != dataSegment.getSize())) {\n    TP_THROW_SYSTEM(EPERM) << \"Data segment of unexpected size\";\n  }\n\n  return std::make_tuple(\n      Error::kSuccess,\n      std::move(headerSegment),\n      std::move(dataSegment),\n      RingBuffer<NumRoles>(header, data));\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/shm_segment.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/shm_segment.h>\n\n#include <fcntl.h>\n#include <linux/mman.h>\n#include <sched.h>\n#include <sys/mman.h>\n#include <sys/stat.h>\n#include <sys/syscall.h>\n#include <sys/types.h>\n#include <unistd.h>\n\n#include <cstring>\n#include <memory>\n#include <sstream>\n#include <thread>\n#include <tuple>\n\nnamespace tensorpipe {\n\nnamespace {\n\n// Our goal is to obtain a file descriptor that is backed by a region of memory.\n// (We need an fd so we can pass it over a UNIX domain socket). We support two\n// ways of doing so:\n// - The memfd_create syscall, which does exactly what we need. Unfortunately\n//   it was added in a recent-ish kernel and an even more recent glibc version.\n// - As a fallback for older systems, we open a file in the /dev/shm directory,\n//   which we expect to be a mountpoint of tmpfs type. We open it with O_TMPFILE\n//   so it remains unnamed, which won't appear in the directory and can't thus\n//   be opened by other processes and will be automatically cleaned up when we\n//   exit. This method has some issues, as it depends on the availability of\n//   /dev/shm and is capped to the size of that mountpoint (rather than the\n//   total memory of the system), which are especially problematic in Docker.\n// FIXME O_TMPFILE is also not that old, and some users have reported issues due\n// to it. We could add a third method as a further fallback.\n\n// Name to give to the memfds. This is just displayed when inspecting the file\n// descriptor in /proc/self/fd to aid debugging, and doesn't have to be unique.\nconstexpr const char* kMemfdName = \"tensorpipe_shm\";\n\nstd::tuple<Error, Fd> createMemfd() {\n  // We don't want to use the ::memfd_create function directly as it's harder to\n  // detect its availability (we'd need to perform a feature check in CMake and\n  // inject the result as a preprocessor flag) and because it would cause us to\n  // link against glibc 2.27. PyTorch aims to support the manylinux2014 platform\n  // (one of the standard platforms defined by Python for PyPI/pip), which has\n  // glibc 2.17. Thus instead we issue the syscall directly, skipping the glibc\n  // wrapper.\n#ifdef SYS_memfd_create\n  // We want to pass the MFD_CLOEXEC flag, but we can't rely on glibc exposing\n  // it, thus we redefine its value if needed.\n#ifndef MFD_CLOEXEC\n// https://github.com/torvalds/linux/blob/master/include/uapi/linux/memfd.h\n#define MFD_CLOEXEC 0x0001U\n#endif\n  int fd = static_cast<int>(::syscall(\n      SYS_memfd_create,\n      static_cast<const char*>(kMemfdName),\n      static_cast<unsigned int>(MFD_CLOEXEC)));\n  if (fd < 0) {\n    return std::make_tuple(\n        TP_CREATE_ERROR(SystemError, \"memfd_create\", errno), Fd());\n  }\n  return std::make_tuple(Error::kSuccess, Fd(fd));\n#else // SYS_memfd_create\n  return std::make_tuple(\n      TP_CREATE_ERROR(SystemError, \"memfd_create\", ENOSYS), Fd());\n#endif // SYS_memfd_create\n}\n\n// Default base path for all segments created.\nconstexpr const char* kBasePath = \"/dev/shm\";\n\nstd::tuple<Error, Fd> openTmpfileInDevShm() {\n  // Some users are compiling on old pre-3.11 kernels. We'd like our backends to\n  // only depend on runtime capabilities, and not on compile-time ones, hence we\n  // \"polyfill\" the flag so the build will pass and we'll get a runtime error.\n#ifndef O_TMPFILE\n// https://github.com/torvalds/linux/blob/master/include/uapi/asm-generic/fcntl.h\n#define O_TMPFILE (020000000 | 00200000)\n#endif\n  int flags = O_TMPFILE | O_EXCL | O_RDWR | O_CLOEXEC;\n  int fd = ::open(kBasePath, flags, 0);\n  if (fd < 0) {\n    return std::make_tuple(TP_CREATE_ERROR(SystemError, \"open\", errno), Fd());\n  }\n\n  return std::make_tuple(Error::kSuccess, Fd(fd));\n}\n\nstd::tuple<Error, Fd> createShmFd() {\n  Error error;\n  Fd fd;\n  std::tie(error, fd) = createMemfd();\n  if (error && error.isOfType<SystemError>() &&\n      error.castToType<SystemError>()->errorCode() == ENOSYS) {\n    std::tie(error, fd) = openTmpfileInDevShm();\n  }\n  return std::make_tuple(std::move(error), std::move(fd));\n}\n\nstd::tuple<Error, MmappedPtr> mmapShmFd(int fd, size_t byteSize) {\n  int flags = MAP_SHARED;\n  int prot = PROT_READ | PROT_WRITE;\n  return MmappedPtr::create(byteSize, prot, flags, fd);\n}\n\n} // namespace\n\nShmSegment::ShmSegment(Fd fd, MmappedPtr ptr)\n    : fd_(std::move(fd)), ptr_(std::move(ptr)) {}\n\nstd::tuple<Error, ShmSegment> ShmSegment::alloc(size_t byteSize) {\n  Error error;\n  Fd fd;\n  std::tie(error, fd) = createShmFd();\n  if (error) {\n    return std::make_tuple(std::move(error), ShmSegment());\n  }\n\n  // grow size to contain byte_size bytes.\n  off_t len = static_cast<off_t>(byteSize);\n  int ret = ::fallocate(fd.fd(), 0, 0, len);\n  if (ret < 0) {\n    return std::make_tuple(\n        TP_CREATE_ERROR(SystemError, \"fallocate\", errno), ShmSegment());\n  }\n\n  MmappedPtr ptr;\n  std::tie(error, ptr) = mmapShmFd(fd.fd(), byteSize);\n  if (error) {\n    return std::make_tuple(std::move(error), ShmSegment());\n  }\n\n  return std::make_tuple(\n      Error::kSuccess, ShmSegment(std::move(fd), std::move(ptr)));\n}\n\nstd::tuple<Error, ShmSegment> ShmSegment::access(Fd fd) {\n  // Load whole file. Use fstat to obtain size.\n  struct stat sb;\n  int ret = ::fstat(fd.fd(), &sb);\n  if (ret < 0) {\n    return std::make_tuple(\n        TP_CREATE_ERROR(SystemError, \"fstat\", errno), ShmSegment());\n  }\n  size_t byteSize = static_cast<size_t>(sb.st_size);\n\n  Error error;\n  MmappedPtr ptr;\n  std::tie(error, ptr) = mmapShmFd(fd.fd(), byteSize);\n  if (error) {\n    return std::make_tuple(std::move(error), ShmSegment());\n  }\n\n  return std::make_tuple(\n      Error::kSuccess, ShmSegment(std::move(fd), std::move(ptr)));\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/shm_segment.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <fcntl.h>\n#include <cstring>\n#include <memory>\n#include <sstream>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/fd.h>\n#include <tensorpipe/common/memory.h>\n#include <tensorpipe/common/optional.h>\n\n//\n// A C++17 version of shared memory segments handler inspired on boost\n// interprocess.\n//\n\nnamespace tensorpipe {\n\nclass ShmSegment {\n  ShmSegment(Fd fd, MmappedPtr ptr);\n\n public:\n  ShmSegment() = default;\n\n  static std::tuple<Error, ShmSegment> alloc(size_t byteSize);\n\n  static std::tuple<Error, ShmSegment> access(Fd fd);\n\n  /// Allocate shared memory to contain an object of type T and construct it.\n  ///\n  /// The Segment object owns the memory and frees it when destructed.\n  /// The raw pointer to the object provides a view into the Segment but doesn't\n  /// own it and may thus become invalid if the Segment isn't kept alive.\n  template <\n      typename T,\n      typename... Args,\n      std::enable_if_t<!std::is_array<T>::value, int> = 0>\n  static std::tuple<Error, ShmSegment, T*> create(Args&&... args) {\n    static_assert(\n        std::is_trivially_copyable<T>::value,\n        \"Shared memory segments are restricted to only store objects that \"\n        \"are trivially copyable (i.e. no pointers and no heap allocation\");\n\n    const auto byteSize = sizeof(T);\n    Error error;\n    ShmSegment segment;\n    std::tie(error, segment) = ShmSegment::alloc(byteSize);\n    if (error) {\n      return std::make_tuple(std::move(error), ShmSegment(), nullptr);\n    }\n    TP_DCHECK_EQ(segment.getSize(), byteSize);\n\n    // Initialize in place. Forward T's constructor arguments.\n    T* ptr = new (segment.getPtr()) T(std::forward<Args>(args)...);\n    TP_THROW_SYSTEM_IF(ptr != segment.getPtr(), EPERM)\n        << \"new's address cannot be different from segment.getPtr() \"\n        << \"address. Some aligment assumption was incorrect\";\n\n    return std::make_tuple(Error::kSuccess, std::move(segment), ptr);\n  }\n\n  /// One-dimensional array version of create<T, ...Args>.\n  // XXX: Fuse all versions of create.\n  template <\n      typename T,\n      std::enable_if_t<std::is_array<T>::value, int> = 0,\n      typename TScalar = typename std::remove_all_extents<T>::type>\n  static std::tuple<Error, ShmSegment, TScalar*> create(size_t numElements) {\n    static_assert(\n        std::is_same<TScalar[], T>::value,\n        \"Only one-dimensional unbounded arrays are supported\");\n    static_assert(\n        std::is_trivially_copyable<TScalar>::value,\n        \"Shared memory segments are restricted to only store objects that \"\n        \"are trivially copyable (i.e. no pointers and no heap allocation\");\n\n    size_t byteSize = sizeof(TScalar) * numElements;\n    Error error;\n    ShmSegment segment;\n    std::tie(error, segment) = ShmSegment::alloc(byteSize);\n    if (error) {\n      return std::make_tuple(std::move(error), ShmSegment(), nullptr);\n    }\n    TP_DCHECK_EQ(segment.getSize(), byteSize);\n\n    // Initialize in place.\n    TScalar* ptr = new (segment.getPtr()) TScalar[numElements]();\n    TP_THROW_SYSTEM_IF(ptr != segment.getPtr(), EPERM)\n        << \"new's address cannot be different from segment.getPtr() \"\n        << \"address. Some aligment assumption was incorrect\";\n\n    return std::make_tuple(Error::kSuccess, std::move(segment), ptr);\n  }\n\n  /// Load an existing shared memory region that already holds an object of type\n  /// T, where T is NOT an array type.\n  template <typename T, std::enable_if_t<!std::is_array<T>::value, int> = 0>\n  static std::tuple<Error, ShmSegment, T*> load(Fd fd) {\n    static_assert(\n        std::is_trivially_copyable<T>::value,\n        \"Shared memory segments are restricted to only store objects that \"\n        \"are trivially copyable (i.e. no pointers and no heap allocation\");\n\n    Error error;\n    ShmSegment segment;\n    std::tie(error, segment) = ShmSegment::access(std::move(fd));\n    if (error) {\n      return std::make_tuple(std::move(error), ShmSegment(), nullptr);\n    }\n    const size_t size = segment.getSize();\n    // XXX: Do some checking other than the size that we are loading\n    // the right type.\n    TP_THROW_SYSTEM_IF(size != sizeof(T), EPERM)\n        << \"Shared memory file has unexpected size. \"\n        << \"Got: \" << size << \" bytes, expected: \" << sizeof(T) << \". \"\n        << \"If there is a race between creation and loading of segments, \"\n        << \"consider linking segment after it has been fully initialized.\";\n    auto ptr = static_cast<T*>(segment.getPtr());\n\n    return std::make_tuple(Error::kSuccess, std::move(segment), ptr);\n  }\n\n  /// Load an existing shared memory region that already holds an object of type\n  /// T, where T is an array type.\n  template <\n      typename T,\n      std::enable_if_t<std::is_array<T>::value, int> = 0,\n      typename TScalar = typename std::remove_all_extents<T>::type>\n  static std::tuple<Error, ShmSegment, TScalar*> load(Fd fd) {\n    static_assert(\n        std::is_same<TScalar[], T>::value,\n        \"Only one-dimensional unbounded arrays are supported\");\n    static_assert(\n        std::is_trivially_copyable<TScalar>::value,\n        \"Shared memory segments are restricted to only store objects that \"\n        \"are trivially copyable (i.e. no pointers and no heap allocation\");\n\n    Error error;\n    ShmSegment segment;\n    std::tie(error, segment) = ShmSegment::access(std::move(fd));\n    if (error) {\n      return std::make_tuple(std::move(error), ShmSegment(), nullptr);\n    }\n    auto ptr = static_cast<TScalar*>(segment.getPtr());\n\n    return std::make_tuple(Error::kSuccess, std::move(segment), ptr);\n  }\n\n  int getFd() const {\n    return fd_.fd();\n  }\n\n  void* getPtr() {\n    return ptr_.ptr();\n  }\n\n  const void* getPtr() const {\n    return ptr_.ptr();\n  }\n\n  size_t getSize() const {\n    return ptr_.getLength();\n  }\n\n private:\n  // The file descriptor of the shared memory file.\n  Fd fd_;\n\n  // Base pointer of mmmap'ed shared memory segment.\n  MmappedPtr ptr_;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/socket.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/socket.h>\n\n#include <fcntl.h>\n#include <sys/un.h>\n#include <unistd.h>\n\n#include <cstring>\n\n#include <tensorpipe/common/defs.h>\n\n#ifndef SOCK_NONBLOCK\n#define SOCK_NONBLOCK 0\n#endif // SOCK_NONBLOCK\n\nnamespace tensorpipe {\n\nstd::tuple<Error, Socket> Socket::createForFamily(sa_family_t aiFamily) {\n  auto rv = socket(aiFamily, SOCK_STREAM | SOCK_NONBLOCK, 0);\n  if (rv == -1) {\n    return std::make_tuple(\n        TP_CREATE_ERROR(SystemError, \"socket\", errno), Socket());\n  }\n  Socket sock(rv);\n#ifndef SOCK_NONBLOCK\n  // The SOCK_NONBLOCK option of socket() is Linux-only. On OSX, we need to\n  // manually set the socket to non-blocking after its creation.\n  auto err = sock->block(false);\n  if (err) {\n    return std::make_tuple(err, Socket());\n  }\n#endif // SOCK_NONBLOCK\n  return std::make_tuple(Error::kSuccess, std::move(sock));\n}\n\nError Socket::block(bool on) {\n  int rv;\n  rv = fcntl(fd_, F_GETFL);\n  if (rv == -1) {\n    return TP_CREATE_ERROR(SystemError, \"fcntl\", errno);\n  }\n  if (!on) {\n    // Set O_NONBLOCK\n    rv |= O_NONBLOCK;\n  } else {\n    // Clear O_NONBLOCK\n    rv &= ~O_NONBLOCK;\n  }\n  rv = fcntl(fd_, F_SETFL, rv);\n  if (rv == -1) {\n    return TP_CREATE_ERROR(SystemError, \"fcntl\", errno);\n  }\n  return Error::kSuccess;\n}\n\nError Socket::reuseAddr(bool on) {\n  int onInt = on ? 1 : 0;\n  auto rv = setsockopt(fd_, SOL_SOCKET, SO_REUSEADDR, &onInt, sizeof(onInt));\n  if (rv == -1) {\n    return TP_CREATE_ERROR(SystemError, \"setsockopt\", errno);\n  }\n  return Error::kSuccess;\n}\n\nError Socket::bind(const Sockaddr& addr) {\n  auto rv = ::bind(fd_, addr.addr(), addr.addrlen());\n  if (rv == -1) {\n    return TP_CREATE_ERROR(SystemError, \"bind\", errno);\n  }\n  return Error::kSuccess;\n}\n\nError Socket::listen(int backlog) {\n  auto rv = ::listen(fd_, backlog);\n  if (rv == -1) {\n    return TP_CREATE_ERROR(SystemError, \"listen\", errno);\n  }\n  return Error::kSuccess;\n}\n\nstd::tuple<Error, Socket> Socket::accept() {\n  struct sockaddr_storage addr;\n  socklen_t addrlen = sizeof(addr);\n  int rv = -1;\n  for (;;) {\n    rv = ::accept(fd_, (struct sockaddr*)&addr, &addrlen);\n    if (rv == -1) {\n      if (errno == EINTR) {\n        continue;\n      }\n      return std::make_tuple(\n          TP_CREATE_ERROR(SystemError, \"accept\", errno), Socket());\n    }\n    break;\n  }\n  return std::make_tuple(Error::kSuccess, Socket(rv));\n}\n\nError Socket::connect(const Sockaddr& addr) {\n  for (;;) {\n    auto rv = ::connect(fd_, addr.addr(), addr.addrlen());\n    if (rv == -1) {\n      if (errno == EINTR) {\n        continue;\n      }\n      if (errno != EINPROGRESS) {\n        return TP_CREATE_ERROR(SystemError, \"connect\", errno);\n      }\n    }\n    break;\n  }\n  return Error::kSuccess;\n}\n\nstd::tuple<Error, struct sockaddr_storage, socklen_t> Socket::getSockName()\n    const {\n  struct sockaddr_storage addr;\n  socklen_t addrlen = sizeof(addr);\n  int rv = ::getsockname(fd_, reinterpret_cast<sockaddr*>(&addr), &addrlen);\n  if (rv < 0) {\n    return std::make_tuple(\n        TP_CREATE_ERROR(SystemError, \"getsockname\", errno), addr, addrlen);\n  }\n  return std::make_tuple(Error::kSuccess, addr, addrlen);\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/socket.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <sys/socket.h>\n\n#include <chrono>\n#include <cstring>\n#include <memory>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/common/fd.h>\n#include <tensorpipe/common/optional.h>\n\nnamespace tensorpipe {\n\nnamespace {\n\nvoid saveOneFdToArray(int& dst, const int& src) {\n  dst = src;\n}\n\nvoid saveOneFdToArray(int& dst, const Fd& src) {\n  dst = src.fd();\n}\n\ntemplate <size_t... Idxs, typename... Fds>\nvoid saveFdsToArray(\n    int* array,\n    std::index_sequence<Idxs...> /*unused*/,\n    const Fds&... fds) {\n  // This is a trick to do pack expansion of the function call.\n  auto dummy = {(saveOneFdToArray(array[Idxs], fds), 0)...};\n}\n\nvoid loadOneFdFromArray(int& src, int& dst) {\n  dst = src;\n}\n\nvoid loadOneFdFromArray(int& src, Fd& dst) {\n  dst = Fd(src);\n}\n\ntemplate <size_t... Idxs, typename... Fds>\nvoid loadFdsFromArray(\n    int* array,\n    std::index_sequence<Idxs...> /*unused*/,\n    Fds&... fds) {\n  // This is a trick to do pack expansion of the function call.\n  auto dummy = {(loadOneFdFromArray(array[Idxs], fds), 0)...};\n}\n\n} // namespace\n\ntemplate <typename T, typename... Fds>\n[[nodiscard]] Error sendToSocket(\n    int socketFd,\n    const T& t1,\n    const T& t2,\n    const Fds&... fds) {\n  using TPayload = int;\n\n  // Build message.\n  struct msghdr msg;\n  msg.msg_name = nullptr;\n  msg.msg_namelen = 0;\n  msg.msg_flags = 0;\n\n  // Build iov to write Ts.\n  std::array<T, 2> tbuf = {t1, t2};\n  struct iovec iov;\n  iov.iov_base = tbuf.data();\n  iov.iov_len = sizeof(tbuf);\n  msg.msg_iov = &iov;\n  msg.msg_iovlen = sizeof(iov) / sizeof(iovec);\n\n  // Build control message.\n  std::array<uint8_t, CMSG_SPACE(sizeof(TPayload) * sizeof...(Fds))> buf;\n  msg.msg_control = buf.data();\n  msg.msg_controllen = buf.size();\n\n  struct cmsghdr* cmsg;\n  cmsg = CMSG_FIRSTHDR(&msg);\n  cmsg->cmsg_level = SOL_SOCKET;\n  cmsg->cmsg_type = SCM_RIGHTS;\n  cmsg->cmsg_len = CMSG_LEN(sizeof(TPayload) * sizeof...(Fds));\n  auto payload = reinterpret_cast<TPayload*>(CMSG_DATA(cmsg));\n  saveFdsToArray(payload, std::index_sequence_for<Fds...>{}, fds...);\n\n  // Send message.\n  for (;;) {\n    auto rv = ::sendmsg(socketFd, &msg, 0);\n    if (rv == -1) {\n      if (errno == EINTR) {\n        continue;\n      }\n      return TP_CREATE_ERROR(SystemError, \"sendmsg\", errno);\n    }\n    if (rv != iov.iov_len) {\n      return TP_CREATE_ERROR(ShortWriteError, iov.iov_len, rv);\n    }\n    break;\n  }\n\n  return Error::kSuccess;\n}\n\ntemplate <typename... Fds>\n[[nodiscard]] Error sendFdsToSocket(int socketFd, const Fds&... fds) {\n  char dummy = 0;\n  return sendToSocket(socketFd, dummy, dummy, fds...);\n}\n\ntemplate <typename T, typename... Fds>\n[[nodiscard]] Error recvFromSocket(int socketFd, T& t1, T& t2, Fds&... fds) {\n  using TPayload = int;\n\n  // Build message.\n  struct msghdr msg;\n  msg.msg_name = nullptr;\n  msg.msg_namelen = 0;\n  msg.msg_flags = 0;\n\n  // Build iov to read Ts.\n  std::array<T, 2> tbuf;\n  struct iovec iov;\n  iov.iov_base = tbuf.data();\n  iov.iov_len = sizeof(tbuf);\n  msg.msg_iov = &iov;\n  msg.msg_iovlen = sizeof(iov) / sizeof(iovec);\n\n  // Build control message.\n  std::array<uint8_t, CMSG_SPACE(sizeof(TPayload) * sizeof...(Fds))> buf;\n  msg.msg_control = buf.data();\n  msg.msg_controllen = buf.size();\n\n  // Receive message.\n  for (;;) {\n    auto rv = ::recvmsg(socketFd, &msg, 0);\n    if (rv == -1) {\n      if (errno == EINTR) {\n        continue;\n      }\n      return TP_CREATE_ERROR(SystemError, \"recvmsg\", errno);\n    }\n    if (rv != iov.iov_len) {\n      return TP_CREATE_ERROR(ShortReadError, iov.iov_len, rv);\n    }\n    break;\n  }\n\n  t1 = tbuf[0];\n  t2 = tbuf[1];\n\n  // Read control message.\n  struct cmsghdr* cmsg;\n  cmsg = CMSG_FIRSTHDR(&msg);\n  TP_DCHECK_NE(cmsg, static_cast<void*>(nullptr));\n  TP_DCHECK_EQ(cmsg->cmsg_level, SOL_SOCKET);\n  TP_DCHECK_EQ(cmsg->cmsg_type, SCM_RIGHTS);\n  TP_DCHECK_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(TPayload) * sizeof...(Fds)));\n  auto payload = reinterpret_cast<TPayload*>(CMSG_DATA(cmsg));\n  loadFdsFromArray(payload, std::index_sequence_for<Fds...>{}, fds...);\n\n  return Error::kSuccess;\n}\n\ntemplate <typename... Fds>\n[[nodiscard]] Error recvFdsFromSocket(int socketFd, Fds&... fds) {\n  char dummy = 0;\n  return recvFromSocket(socketFd, dummy, dummy, fds...);\n}\n\nclass Sockaddr {\n public:\n  virtual const struct sockaddr* addr() const = 0;\n\n  virtual socklen_t addrlen() const = 0;\n\n  virtual ~Sockaddr() = default;\n};\n\nclass Socket final : public Fd {\n public:\n  [[nodiscard]] static std::tuple<Error, Socket> createForFamily(\n      sa_family_t aiFamily);\n\n  Socket() = default;\n\n  explicit Socket(int fd) : Fd(fd) {}\n\n  // Configure if the socket is blocking or not.\n  [[nodiscard]] Error block(bool on);\n\n  // Set (or unset) the SO_REUSEADDR option on the socket.\n  [[nodiscard]] Error reuseAddr(bool on);\n\n  // Bind socket to address.\n  [[nodiscard]] Error bind(const Sockaddr& addr);\n\n  // Listen on socket.\n  [[nodiscard]] Error listen(int backlog);\n\n  // Accept new socket connecting to listening socket.\n  [[nodiscard]] std::tuple<Error, Socket> accept();\n\n  // Connect to address.\n  [[nodiscard]] Error connect(const Sockaddr& addr);\n\n  [[nodiscard]] std::tuple<Error, struct sockaddr_storage, socklen_t>\n  getSockName() const;\n\n  // Send file descriptor.\n  template <typename... Fds>\n  [[nodiscard]] Error sendFds(const Fds&... fds) {\n    return sendFdsToSocket(fd_, fds...);\n  }\n\n  // Receive file descriptor.\n  template <typename... Fds>\n  [[nodiscard]] Error recvFds(Fds&... fds) {\n    return recvFdsFromSocket(fd_, fds...);\n  }\n\n  // Send object and file descriptor.\n  template <\n      typename T,\n      typename... Fds,\n      typename std::enable_if<std::is_trivially_copyable<T>::value, bool>::\n          type = false>\n  [[nodiscard]] Error sendPayloadAndFds(\n      const T& t1,\n      const T& t2,\n      const Fds&... fds) {\n    return sendToSocket(fd_, t1, t2, fds...);\n  }\n\n  // Receive object and file descriptor.\n  template <\n      typename T,\n      typename... Fds,\n      typename std::enable_if<std::is_trivially_copyable<T>::value, bool>::\n          type = false>\n  [[nodiscard]] Error recvPayloadAndFds(T& t1, T& t2, Fds&... fds) {\n    return recvFromSocket(fd_, t1, t2, fds...);\n  }\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/state_machine.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstdint>\n#include <deque>\n#include <utility>\n\nnamespace tensorpipe {\n\ntemplate <typename TSubject, typename TOp>\nclass OpsStateMachine {\n public:\n  class Iter {\n   public:\n    TOp& operator*() const {\n      return *opPtr_;\n    }\n\n    TOp* operator->() const {\n      return opPtr_;\n    }\n\n   private:\n    explicit Iter(TOp* opPtr) : opPtr_(opPtr) {}\n\n    TOp* opPtr_{nullptr};\n\n    friend OpsStateMachine;\n  };\n\n  using Transitioner = void (TSubject::*)(Iter, typename TOp::State);\n\n  OpsStateMachine(TSubject& subject, Transitioner transitioner)\n      : subject_(subject), transitioner_(transitioner) {}\n\n  template <typename... TArgs>\n  Iter emplaceBack(uint64_t sequenceNumber, TArgs&&... args) {\n    ops_.emplace_back(std::forward<TArgs>(args)...);\n    TOp& op = ops_.back();\n    op.sequenceNumber = sequenceNumber;\n    return Iter(&op);\n  }\n\n  void advanceOperation(Iter initialOpIter) {\n    // Advancing one operation may unblock later ones that could have progressed\n    // but were prevented from overtaking. Thus each time an operation manages\n    // to advance we'll try to also advance the one after.\n    for (int64_t sequenceNumber = initialOpIter->sequenceNumber;;\n         ++sequenceNumber) {\n      TOp* opPtr = findOperation(sequenceNumber);\n      if (opPtr == nullptr || opPtr->state == TOp::FINISHED ||\n          !advanceOneOperation(*opPtr)) {\n        break;\n      }\n    }\n  }\n\n  void advanceAllOperations() {\n    // We cannot just iterate over the operations here as advanceOneOperation\n    // could potentially erase some of them, thus invalidating references and/or\n    // iterators.\n    if (ops_.empty()) {\n      return;\n    }\n    for (int64_t sequenceNumber = ops_.front().sequenceNumber;;\n         ++sequenceNumber) {\n      TOp* opPtr = findOperation(sequenceNumber);\n      if (opPtr == nullptr) {\n        break;\n      }\n      advanceOneOperation(*opPtr);\n    }\n  }\n\n  void attemptTransition(\n      Iter opIter,\n      typename TOp::State from,\n      typename TOp::State to,\n      bool cond,\n      std::initializer_list<void (TSubject::*)(Iter)> actions) {\n    if (opIter->state == from && cond) {\n      for (const auto& action : actions) {\n        (subject_.*action)(opIter);\n      }\n      opIter->state = to;\n    }\n  }\n\n private:\n  TOp* findOperation(int64_t sequenceNumber) {\n    if (ops_.empty()) {\n      return nullptr;\n    }\n    int64_t offset = sequenceNumber - ops_.front().sequenceNumber;\n    if (offset < 0 || offset >= ops_.size()) {\n      return nullptr;\n    }\n    TOp& op = ops_[offset];\n    TP_DCHECK_EQ(op.sequenceNumber, sequenceNumber);\n    return &op;\n  }\n\n  bool advanceOneOperation(TOp& op) {\n    // Due to the check in attemptTransition, each time that an operation\n    // advances its state we must check whether this unblocks some later\n    // operations that could progress but weren't allowed to overtake. In order\n    // to detect whether this operation is advancing we store its state at the\n    // beginning and then compare it with the state at the end.\n    typename TOp::State initialState = op.state;\n\n    // The operations must advance in order: later operations cannot \"overtake\"\n    // earlier ones. Thus if this operation would reach a more advanced state\n    // than previous operation we won't perform the transition.\n    TOp* prevOpPtr = findOperation(op.sequenceNumber - 1);\n    typename TOp::State prevOpState =\n        prevOpPtr != nullptr ? prevOpPtr->state : TOp::FINISHED;\n\n    (subject_.*transitioner_)(Iter(&op), prevOpState);\n\n    // Compute return value now in case we next delete the operation.\n    bool hasAdvanced = op.state != initialState;\n\n    if (op.state == TOp::FINISHED) {\n      // We can't remove the op if it's \"in the middle\". And, therefore, once we\n      // remove the op at the front, we must check if other ops now also get\n      // \"unblocked\". In other words, we always remove as much as we can from\n      // the front.\n      while (!ops_.empty() && ops_.front().state == TOp::FINISHED) {\n        ops_.pop_front();\n      }\n    }\n\n    return hasAdvanced;\n  }\n\n  TSubject& subject_;\n  const Transitioner transitioner_;\n  std::deque<TOp> ops_;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/stream_read_write_ops.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <array>\n#include <functional>\n#include <memory>\n#include <tuple>\n#include <utility>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/optional.h>\n\nnamespace tensorpipe {\n\n// The read operation captures all state associated with reading a\n// fixed length chunk of data from the underlying connection. All\n// reads are required to include a word-sized header containing the\n// number of bytes in the operation. This makes it possible for the\n// read side of the connection to either 1) not know how many bytes\n// to expected, and dynamically allocate, or 2) know how many bytes\n// to expect, and preallocate the destination memory.\nclass StreamReadOperation {\n  enum Mode {\n    READ_LENGTH,\n    READ_PAYLOAD,\n    COMPLETE,\n  };\n\n public:\n  using read_callback_fn =\n      std::function<void(const Error& error, const void* ptr, size_t len)>;\n\n  explicit inline StreamReadOperation(read_callback_fn fn);\n\n  inline StreamReadOperation(void* ptr, size_t length, read_callback_fn fn);\n\n  // Called when a buffer is needed to read data from stream.\n  inline void allocFromLoop(char** base, size_t* len);\n\n  // Called when data has been read from stream.\n  inline void readFromLoop(size_t nread);\n\n  // Returns if this read operation is complete.\n  inline bool completeFromLoop() const;\n\n  // Invoke user callback.\n  inline void callbackFromLoop(const Error& error);\n\n private:\n  Mode mode_{READ_LENGTH};\n  char* ptr_{nullptr};\n\n  // Number of bytes as specified by the user (if applicable).\n  optional<size_t> givenLength_;\n\n  // Number of bytes to expect as read from the connection.\n  size_t readLength_{0};\n\n  // Number of bytes read from the connection.\n  // This is reset to 0 when we advance from READ_LENGTH to READ_PAYLOAD.\n  size_t bytesRead_{0};\n\n  // Holds temporary allocation if no length was specified.\n  std::unique_ptr<char[]> buffer_{nullptr};\n\n  // User callback.\n  read_callback_fn fn_;\n};\n\nStreamReadOperation::StreamReadOperation(read_callback_fn fn)\n    : fn_(std::move(fn)) {}\n\nStreamReadOperation::StreamReadOperation(\n    void* ptr,\n    size_t length,\n    read_callback_fn fn)\n    : ptr_(static_cast<char*>(ptr)), givenLength_(length), fn_(std::move(fn)) {}\n\nvoid StreamReadOperation::allocFromLoop(char** base, size_t* len) {\n  if (mode_ == READ_LENGTH) {\n    TP_DCHECK_LT(bytesRead_, sizeof(readLength_));\n    *base = reinterpret_cast<char*>(&readLength_) + bytesRead_;\n    *len = sizeof(readLength_) - bytesRead_;\n  } else if (mode_ == READ_PAYLOAD) {\n    TP_DCHECK_LT(bytesRead_, readLength_);\n    TP_DCHECK(ptr_ != nullptr);\n    *base = ptr_ + bytesRead_;\n    *len = readLength_ - bytesRead_;\n  } else {\n    TP_THROW_ASSERT() << \"invalid mode \" << mode_;\n  }\n}\n\nvoid StreamReadOperation::readFromLoop(size_t nread) {\n  bytesRead_ += nread;\n  if (mode_ == READ_LENGTH) {\n    TP_DCHECK_LE(bytesRead_, sizeof(readLength_));\n    if (bytesRead_ == sizeof(readLength_)) {\n      if (givenLength_.has_value()) {\n        TP_DCHECK(ptr_ != nullptr || givenLength_.value() == 0);\n        TP_DCHECK_EQ(readLength_, givenLength_.value());\n      } else {\n        TP_DCHECK(ptr_ == nullptr);\n        buffer_ = std::make_unique<char[]>(readLength_);\n        ptr_ = buffer_.get();\n      }\n      if (readLength_ == 0) {\n        mode_ = COMPLETE;\n      } else {\n        mode_ = READ_PAYLOAD;\n      }\n      bytesRead_ = 0;\n    }\n  } else if (mode_ == READ_PAYLOAD) {\n    TP_DCHECK_LE(bytesRead_, readLength_);\n    if (bytesRead_ == readLength_) {\n      mode_ = COMPLETE;\n    }\n  } else {\n    TP_THROW_ASSERT() << \"invalid mode \" << mode_;\n  }\n}\n\nbool StreamReadOperation::completeFromLoop() const {\n  return mode_ == COMPLETE;\n}\n\nvoid StreamReadOperation::callbackFromLoop(const Error& error) {\n  fn_(error, ptr_, readLength_);\n}\n\n// The write operation captures all state associated with writing a\n// fixed length chunk of data from the underlying connection. The\n// write includes a word-sized header containing the length of the\n// write. This header is a member field on this class and therefore\n// the instance must be kept alive and the reference to the instance\n// must remain valid until the write callback has been called.\nclass StreamWriteOperation {\n public:\n  using write_callback_fn = std::function<void(const Error& error)>;\n\n  inline StreamWriteOperation(\n      const void* ptr,\n      size_t length,\n      write_callback_fn fn);\n\n  struct Buf {\n    char* base;\n    size_t len;\n  };\n\n  inline std::tuple<Buf*, size_t> getBufs();\n\n  // Invoke user callback.\n  inline void callbackFromLoop(const Error& error);\n\n private:\n  const char* ptr_;\n  const size_t length_;\n\n  // Buffers (structs with pointers and lengths) to write to stream.\n  std::array<Buf, 2> bufs_;\n\n  // User callback.\n  write_callback_fn fn_;\n};\n\nStreamWriteOperation::StreamWriteOperation(\n    const void* ptr,\n    size_t length,\n    write_callback_fn fn)\n    : ptr_(static_cast<const char*>(ptr)), length_(length), fn_(std::move(fn)) {\n  bufs_[0].base = const_cast<char*>(reinterpret_cast<const char*>(&length_));\n  bufs_[0].len = sizeof(length_);\n  bufs_[1].base = const_cast<char*>(ptr_);\n  bufs_[1].len = length_;\n}\n\nstd::tuple<StreamWriteOperation::Buf*, size_t> StreamWriteOperation::getBufs() {\n  size_t numBuffers = length_ == 0 ? 1 : 2;\n  return std::make_tuple(bufs_.data(), numBuffers);\n}\n\nvoid StreamWriteOperation::callbackFromLoop(const Error& error) {\n  fn_(error);\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/strings.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <sstream>\n#include <string>\n#include <vector>\n\nnamespace tensorpipe {\n\ninline std::string joinStrs(const std::vector<std::string>& strs) {\n  if (strs.empty()) {\n    return \"\";\n  }\n  std::ostringstream oss;\n  oss << strs[0];\n  for (size_t idx = 1; idx < strs.size(); idx++) {\n    oss << \", \" << strs[idx];\n  }\n  return oss.str();\n}\n\ntemplate <typename T>\nstd::string formatMatrix(const std::vector<std::vector<T>>& matrix) {\n  std::ostringstream oss;\n  oss << \"{\";\n  for (size_t rowIdx = 0; rowIdx < matrix.size(); rowIdx++) {\n    if (rowIdx > 0) {\n      oss << \", \";\n    }\n    oss << \"{\";\n    for (size_t colIdx = 0; colIdx < matrix[rowIdx].size(); colIdx++) {\n      if (colIdx > 0) {\n        oss << \", \";\n      }\n      oss << matrix[rowIdx][colIdx];\n    }\n    oss << \"}\";\n  }\n  oss << \"}\";\n  return oss.str();\n}\n\n// Since text manipulation is hard, let's use this to double-check our results.\ninline bool isValidUuid(const std::string& uuid) {\n  // Check it's in this format:\n  // aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee\n  // |0   |5   |10  |15  |20  |25  |30  |35\n  if (uuid.size() != 36) {\n    return false;\n  }\n  for (int i = 0; i < uuid.size(); i++) {\n    if (i == 8 || i == 13 || i == 18 || i == 23) {\n      if (uuid[i] != '-') {\n        return false;\n      }\n    } else {\n      if (!((uuid[i] >= '0' && uuid[i] <= '9') ||\n            (uuid[i] >= 'a' && uuid[i] <= 'f'))) {\n        return false;\n      }\n    }\n  }\n  return true;\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/system.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/system.h>\n\n#ifdef __linux__\n#include <linux/capability.h>\n#include <pthread.h>\n#include <sys/stat.h>\n#include <sys/types.h>\n#include <unistd.h>\n#endif\n\n#ifdef __APPLE__\n#include <IOKit/IOKitLib.h>\n#endif\n\n#include <array>\n#include <cstring>\n#include <fstream>\n#include <iomanip>\n#include <sstream>\n#include <stdexcept>\n#include <system_error>\n#include <thread>\n\n#ifdef __linux__\n\n// This is a libc wrapper for the Linux syscall.\n// I'm not sure why we need to declare it ourselves, but that's what libcap\n// does too, and I couldn't find any libc header in which it's declared.\n// Direct use of the syscall is strongly discouraged, in favor of libcap (which\n// has a more friendly API and better backwards-compatibility). However we\n// really don't want to add a dependency, and moreover libcap introduces an\n// artificial limitation that only allows us to query the capabilities that were\n// defined by the kernel headers when libcap was built, meaning we might miss\n// some (new) capabilities if the kernel was updated in the meantime.\nextern \"C\" {\nextern int capget(cap_user_header_t header, const cap_user_data_t data);\n}\n\n#endif\n\nnamespace tensorpipe {\n\nnamespace {\n\n#ifdef __APPLE__\noptional<std::string> getBootIDInternal() {\n  std::array<char, 128> buf;\n\n  // See https://developer.apple.com/documentation/iokit/iokitlib_h for IOKitLib\n  // API documentation.\n  io_registry_entry_t ioRegistryRoot =\n      IORegistryEntryFromPath(kIOMainPortDefault, \"IOService:/\");\n  CFStringRef uuidCf = (CFStringRef)IORegistryEntryCreateCFProperty(\n      ioRegistryRoot, CFSTR(kIOPlatformUUIDKey), kCFAllocatorDefault, 0);\n  IOObjectRelease(ioRegistryRoot);\n  CFStringGetCString(uuidCf, buf.data(), buf.size(), kCFStringEncodingMacRoman);\n  CFRelease(uuidCf);\n\n  return std::string(buf.data());\n}\n\n#elif defined(__linux__)\noptional<std::string> getBootIDInternal() {\n  std::ifstream f{\"/proc/sys/kernel/random/boot_id\"};\n  if (!f.is_open()) {\n    return nullopt;\n  }\n  std::string v;\n  getline(f, v);\n  f.close();\n  return v;\n}\n\n// See namespaces(7).\nstd::string getPathForLinuxNamespace(LinuxNamespace ns) {\n  std::ostringstream oss;\n  oss << \"/proc/self/ns/\";\n  switch (ns) {\n    case LinuxNamespace::kIpc:\n      oss << \"ipc\";\n      break;\n    case LinuxNamespace::kNet:\n      oss << \"net\";\n      break;\n    case LinuxNamespace::kPid:\n      oss << \"pid\";\n      break;\n    case LinuxNamespace::kUser:\n      oss << \"user\";\n      break;\n    default:\n      TP_THROW_ASSERT() << \"Unknown namespace\";\n  }\n  return oss.str();\n}\n\n#endif\n\n} // namespace\n\nstd::string tstampToStr(TimeStamp ts) {\n  if (ts == kInvalidTimeStamp) {\n    return \"NA\";\n  }\n  // print timestaps in microseconds.\n  constexpr TimeStamp kDiv = 1000u;\n  std::stringstream ss;\n  ss << std::setw(9) << std::setfill(' ') << ts / kDiv;\n  ss << \".\" << std::setw(3) << std::setfill('0') << ts % kDiv << \"us\";\n  return ss.str();\n}\n\noptional<std::string> getProcFsStr(const std::string& fileName, pid_t tid) {\n  std::ostringstream oss;\n  oss << \"/proc/\" << tid << \"/\" << fileName;\n  std::ifstream f{oss.str()};\n  if (!f.is_open()) {\n    return nullopt;\n  }\n  std::string v;\n  getline(f, v);\n  f.close();\n  return v;\n}\n\nstd::string removeBlankSpaces(std::string s) {\n  // Remove blanks.\n  s.erase(\n      std::remove_if(\n          s.begin(), s.end(), [](unsigned char c) { return std::isspace(c); }),\n      s.end());\n  return s;\n}\n\noptional<std::string> getBootID() {\n  static optional<std::string> bootID = getBootIDInternal();\n  return bootID;\n}\n\n#ifdef __APPLE__\n\n// OSX is a UNIX, so often we'd like some of our Linux backends to work there\n// too, but its lack of support for namespaces poses issues. However, that's\n// like saying that in OSX all processes are in the same namespace with respect\n// to all resources, so we pretend namespaces are supported, with a constant ID.\noptional<std::string> getLinuxNamespaceId(LinuxNamespace ns) {\n  return std::string();\n}\n\n#elif defined(__linux__)\n\n// According to namespaces(7):\n// > Each process has a /proc/[pid]/ns/ subdirectory containing one entry for\n// > each namespace [...]. If two processes are in the same namespace, then the\n// > device IDs and inode numbers of their /proc/[pid]/ns/xxx symbolic links\n// > will be the same; an application can check this using the stat.st_dev and\n// > stat.st_ino fields returned by stat(2).\noptional<std::string> getLinuxNamespaceId(LinuxNamespace ns) {\n  struct stat statInfo;\n  std::string procfsNamespacePath = getPathForLinuxNamespace(ns);\n  // First use lstat to stat the link itself, to ensure it's indeed a link.\n  int rv = ::lstat(procfsNamespacePath.c_str(), &statInfo);\n\n  if (rv < 0 && errno == ENOENT) {\n    // These files were first provided in Linux 3.0 (although some of them came\n    // later), however namespaces already existed before then, hence the only\n    // safe thing to do is assume all processes are in different namespaces.\n    return nullopt;\n  }\n  // Other errors, like access/permission ones, are unexpected.\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n\n  // Between Linux 3.0 and 3.7 these files were hard links. In Linux 3.8 they\n  // became symlinks and only then it became possible to identify namespaces\n  // through these files' inode numbers.\n  if (!S_ISLNK(statInfo.st_mode)) {\n    return nullopt;\n  }\n\n  // Then stat the \"file\" the link points to, as it's its inode we care about.\n  rv = ::stat(procfsNamespacePath.c_str(), &statInfo);\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n\n  // These fields are of types dev_t and ino_t, which I couldn't find described\n  // anywhere. They appear to be unsigned longs, but all we care about is that\n  // they are integers, so let's check that.\n  static_assert(std::is_integral<decltype(statInfo.st_dev)>::value, \"\");\n  static_assert(std::is_integral<decltype(statInfo.st_ino)>::value, \"\");\n  std::ostringstream oss;\n  oss << std::hex << statInfo.st_dev << '_' << statInfo.st_ino;\n  return oss.str();\n}\n\n// According to https://www.kernel.org/doc/Documentation/security/LSM.txt:\n// > A list of the active security modules can be found by reading\n// > /sys/kernel/security/lsm. This is a comma separated list [...].\noptional<std::vector<std::string>> getLinuxSecurityModules() {\n  std::ifstream f{\"/sys/kernel/security/lsm\"};\n  if (f.fail()) {\n    return nullopt;\n  }\n  // We shouldn't have to worry about an entirely empty file, as according to\n  // the doc \"[this list] will always include the capability module\".\n  std::vector<std::string> res;\n  while (!f.eof()) {\n    std::string lsm;\n    std::getline(f, lsm, ',');\n    TP_THROW_ASSERT_IF(f.fail());\n    res.push_back(std::move(lsm));\n  }\n  f.close();\n  TP_THROW_ASSERT_IF(f.fail());\n  return res;\n}\n\n// See ptrace(2) (the sections towards the end) and\n// https://www.kernel.org/doc/Documentation/security/Yama.txt\noptional<YamaPtraceScope> getYamaPtraceScope() {\n  std::ifstream f{\"/proc/sys/kernel/yama/ptrace_scope\"};\n  if (f.fail()) {\n    return nullopt;\n  }\n  int scope;\n  f >> scope;\n  TP_THROW_ASSERT_IF(f.fail());\n  f.close();\n  TP_THROW_ASSERT_IF(f.fail());\n  switch (scope) {\n    case 0:\n      return YamaPtraceScope::kClassicPtracePermissions;\n    case 1:\n      return YamaPtraceScope::kRestrictedPtrace;\n    case 2:\n      return YamaPtraceScope::kAdminOnlyAttach;\n    case 3:\n      return YamaPtraceScope::kNoAttach;\n    default:\n      TP_THROW_ASSERT() << \"Unrecognized YAMA ptrace scope: \" << scope;\n      // Dummy return to make the compiler happy.\n      return nullopt;\n  }\n}\n\noptional<std::string> getPermittedCapabilitiesID() {\n  std::remove_pointer<cap_user_header_t>::type header;\n  std::array<std::remove_pointer<cap_user_data_t>::type, 2> data;\n\n  // At the time of writing there are three versions of the syscall supported\n  // by the kernel, and we're supposed to perform a \"handshake\" to agree on the\n  // latest version supported both by us and by the kernel. However, this is\n  // only needed if we want to support pre-2.6.26 kernels, which we don't. Hence\n  // we'll fail if the kernel doesn't support the latest version (v3). On the\n  // other hand there is no way to figure out if the kernel's version has\n  // advanced past the one we support. This will occur once there will be more\n  // than 64 capabilities, but given the current pace this shouldn't happen for\n  // quite a while. Such a limitation probably comes from the capability system\n  // being designed around querying for a specific capability (in which case a\n  // program only needs to support the syscall version where that capability was\n  // added); querying _all_ capabilities (as we do) is kinda out-of-scope.\n  header.version = 0x20080522;\n  header.pid = 0;\n\n  int rv = ::capget(&header, data.data());\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n\n  // We'll create a bitmask of the capabilities, and then return its hex.\n  uint64_t bitmask = static_cast<uint64_t>(data[0].permitted) |\n      (static_cast<uint64_t>(data[1].permitted) << 32);\n  std::ostringstream oss;\n  oss << std::hex << bitmask;\n  return oss.str();\n}\n\n#endif\n\nvoid setThreadName(std::string name) {\n#ifdef __linux__\n// In glibc this non-standard call was added in version 2.12, hence we guard it.\n#ifdef __GLIBC__\n#if ((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ >= 12)))\n  pthread_setname_np(pthread_self(), name.c_str());\n#endif\n// In other standard libraries we didn't check yet, hence we always enable it.\n#else\n  pthread_setname_np(pthread_self(), name.c_str());\n#endif\n#endif\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/common/system.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <algorithm>\n#include <chrono>\n#include <fstream>\n#include <set>\n#include <sstream>\n#include <string>\n#include <vector>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/optional.h>\n\nnamespace tensorpipe {\n\n//\n// TimeStamp is a 64 bit value representing\n// a high-resolution clock. It is usually\n// in nano-seconds or in TSC cycles.\n//\nusing TimeStamp = uint64_t;\nconstexpr TimeStamp kInvalidTimeStamp = std::numeric_limits<TimeStamp>::max();\n\nstd::string tstampToStr(TimeStamp ts);\n\n// std::chronos::duration to TSC.\ntemplate <class TDuration>\nTimeStamp durationToTimeStamp(TDuration d) {\n  auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(d).count();\n  if (ns < 0) {\n    TP_THROW_EINVAL() << \"Negative time durations are not valid\";\n  }\n  return static_cast<TimeStamp>(ns);\n}\n\n//\n// Useful math functions to work with CPU and binary integers\n//\n\n/// Is it a Power of 2?\nconstexpr bool isPow2(uint64_t n) noexcept {\n  return n > 0 && !((n - 1) & n);\n}\n\n/// Smallest power of 2 larger or equal to <n>.\nconstexpr uint32_t nextPow2(uint32_t n) noexcept {\n  --n;\n\n  n |= n >> 1;\n  n |= n >> 2;\n  n |= n >> 4;\n  n |= n >> 8;\n  n |= n >> 16;\n\n  return n + 1;\n}\n\n/// Smallest power of 2 larger or equal to <n>\nconstexpr uint64_t nextPow2(uint64_t n) noexcept {\n  --n;\n\n  n |= n >> 1;\n  n |= n >> 2;\n  n |= n >> 4;\n  n |= n >> 8;\n  n |= n >> 16;\n  n |= n >> 32;\n\n  return n + 1;\n}\n\n/// Largest power of 2 less or equal to <n>\nconstexpr uint64_t maxPow2LessEqualThan(uint64_t n) noexcept {\n  if (isPow2(n)) {\n    return n;\n  }\n  return nextPow2(n) >> 1;\n}\n\n// Return contents of /proc/sys/kernel/random/boot_id.\noptional<std::string> getBootID();\n\nenum class LinuxNamespace {\n  kIpc,\n  kNet,\n  kPid,\n  kUser,\n  // Add more entries as needed.\n};\n\n// Returns a string that uniquely identifies a namespace of a certain type.\n// It is only valid within the same machine and for that fixed type.\noptional<std::string> getLinuxNamespaceId(LinuxNamespace ns);\n\n// Returns the names of the active Linux Security Modules, in the order in which\n// they are employed by the kernel. The names could be arbitrary (as third-party\n// LSMs could be in use) but contain values like \"capability\", \"apparmor\",\n// \"yama\", \"lockdown\", ...\noptional<std::vector<std::string>> getLinuxSecurityModules();\n\nenum class YamaPtraceScope {\n  kClassicPtracePermissions,\n  kRestrictedPtrace,\n  kAdminOnlyAttach,\n  kNoAttach,\n};\n\n// YAMA is a Linux Security Module that specifically targets ptrace by locking\n// down a process so it can only be targeted by its ancestors or by processes\n// that it specifically selects. However YAMA can be disabled, or made even\n// stricter. This function returns precisely what level YAMA is operating at.\noptional<YamaPtraceScope> getYamaPtraceScope();\n\n// Return a representation of the set of permitted capabilities of the process.\n// We're talking about Linux kernel capabilities, see capabilities(7).\noptional<std::string> getPermittedCapabilitiesID();\n\n// Set the name of the current thread, if possible. Use only for debugging.\nvoid setThreadName(std::string name);\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/config.h.in",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#cmakedefine01 TENSORPIPE_HAS_SHM_TRANSPORT\n#cmakedefine01 TENSORPIPE_HAS_IBV_TRANSPORT\n\n#cmakedefine01 TENSORPIPE_HAS_CMA_CHANNEL\n"
  },
  {
    "path": "tensorpipe/config_cuda.h.in",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#cmakedefine01 TENSORPIPE_HAS_CUDA_IPC_CHANNEL\n#cmakedefine01 TENSORPIPE_HAS_CUDA_GDR_CHANNEL\n"
  },
  {
    "path": "tensorpipe/core/context.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/core/context.h>\n\n#include <memory>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/core/context_impl.h>\n\nnamespace tensorpipe {\n\nContext::Context(ContextOptions opts)\n    : impl_(std::make_shared<ContextImpl>(std::move(opts))) {\n  impl_->init();\n}\n\nvoid Context::registerTransport(\n    int64_t priority,\n    std::string transport,\n    std::shared_ptr<transport::Context> context) {\n  impl_->registerTransport(priority, std::move(transport), std::move(context));\n}\n\nvoid Context::registerChannel(\n    int64_t priority,\n    std::string channel,\n    std::shared_ptr<channel::Context> context) {\n  impl_->registerChannel(priority, std::move(channel), std::move(context));\n}\n\nstd::shared_ptr<Listener> Context::listen(\n    const std::vector<std::string>& urls) {\n  return impl_->listen(urls);\n}\n\nstd::shared_ptr<Pipe> Context::connect(\n    const std::string& url,\n    PipeOptions opts) {\n  return impl_->connect(url, std::move(opts));\n}\n\nvoid Context::close() {\n  impl_->close();\n}\n\nvoid Context::join() {\n  impl_->join();\n}\n\nContext::~Context() {\n  join();\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/context.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/transport/context.h>\n\n#include <tensorpipe/channel/context.h>\n\nnamespace tensorpipe {\n\nclass ContextImpl;\nclass Listener;\nclass Pipe;\n\nclass ContextOptions {\n public:\n  // The name should be a semantically meaningful description of this context.\n  // It will only be used for logging and debugging purposes, to identify the\n  // endpoints of a pipe.\n  ContextOptions&& name(std::string name) && {\n    name_ = std::move(name);\n    return std::move(*this);\n  }\n\n private:\n  std::string name_;\n\n  friend ContextImpl;\n};\n\nclass PipeOptions {\n public:\n  // The name should be a semantically meaningful description of the context\n  // that the pipe is connecting to. It will only be used for logging and\n  // debugging purposes, to identify the endpoints of a pipe.\n  PipeOptions&& remoteName(std::string remoteName) && {\n    remoteName_ = std::move(remoteName);\n    return std::move(*this);\n  }\n\n private:\n  std::string remoteName_;\n\n  friend ContextImpl;\n};\n\nclass Context final {\n public:\n  explicit Context(ContextOptions opts = ContextOptions());\n\n  void registerTransport(\n      int64_t priority,\n      std::string transport,\n      std::shared_ptr<transport::Context> context);\n\n  void registerChannel(\n      int64_t priority,\n      std::string channel,\n      std::shared_ptr<channel::Context> context);\n\n  std::shared_ptr<Listener> listen(const std::vector<std::string>& urls);\n\n  std::shared_ptr<Pipe> connect(\n      const std::string& url,\n      PipeOptions opts = PipeOptions());\n\n  // Put the context in a terminal state, in turn closing all of its pipes and\n  // listeners, and release its resources. This may be done asynchronously, in\n  // background.\n  void close();\n\n  // Wait for all resources to be released and all background activity to stop.\n  void join();\n\n  ~Context();\n\n private:\n  // The implementation is managed by a shared_ptr because each child object\n  // will also hold a shared_ptr to it. However, its lifetime is tied to the one\n  // of this public object since when the latter is destroyed the implementation\n  // is closed and joined.\n  const std::shared_ptr<ContextImpl> impl_;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/core/context_impl.h>\n\n#include <sys/types.h>\n#include <unistd.h>\n\n#include <atomic>\n#include <memory>\n#include <string>\n#include <tuple>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/common/queue.h>\n#include <tensorpipe/core/error.h>\n#include <tensorpipe/core/listener.h>\n#include <tensorpipe/core/listener_impl.h>\n#include <tensorpipe/core/pipe.h>\n#include <tensorpipe/core/pipe_impl.h>\n#include <tensorpipe/transport/connection.h>\n\nnamespace tensorpipe {\n\nnamespace {\n\nstd::atomic<uint64_t> contextCouter{0};\n\nstd::string createContextId() {\n  // Should we use argv[0] instead of the PID? It may be more semantically\n  // meaningful and consistent across runs, but it may not be unique...\n  // Also, should we add the hostname/the IP address in case the logs from\n  // different hosts are merged into a single stream?\n  // Eventually we'll have to replace getpid with something more portable.\n  // Libuv offers a cross-platform function to get the process ID.\n  return std::to_string(getpid()) + \":c\" + std::to_string(contextCouter++);\n}\n\n} // namespace\n\nContextImpl::ContextImpl(ContextOptions opts)\n    : id_(createContextId()), name_(std::move(opts.name_)) {\n  TP_VLOG(1) << \"Context \" << id_ << \" created\";\n  if (name_ != \"\") {\n    TP_VLOG(1) << \"Context \" << id_ << \" aliased as \" << name_;\n    id_ = name_;\n  }\n}\n\nvoid ContextImpl::init() {\n  deferToLoop([this]() { initFromLoop(); });\n}\n\nvoid ContextImpl::initFromLoop() {}\n\nvoid ContextImpl::registerTransport(\n    int64_t priority,\n    std::string transport,\n    std::shared_ptr<transport::Context> context) {\n  TP_THROW_ASSERT_IF(transport.empty());\n  TP_THROW_ASSERT_IF(transports_.find(transport) != transports_.end())\n      << \"transport \" << transport << \" already registered\";\n  TP_THROW_ASSERT_IF(\n      transportsByPriority_.find(-priority) != transportsByPriority_.end())\n      << \"transport with priority \" << priority << \" already registered\";\n  if (!context->isViable()) {\n    TP_VLOG(1) << \"Context \" << id_ << \" is not registering transport \"\n               << transport << \" because it is not viable\";\n    return;\n  }\n  TP_VLOG(1) << \"Context \" << id_ << \" is registering transport \" << transport;\n  context->setId(id_ + \".tr_\" + transport);\n  transports_.emplace(transport, context);\n  // Reverse the priority, as the pipe will pick the *first* available transport\n  // it can find in the ordered map, so higher priorities should come first.\n  transportsByPriority_.emplace(-priority, std::make_tuple(transport, context));\n}\n\nvoid ContextImpl::registerChannel(\n    int64_t priority,\n    std::string channel,\n    std::shared_ptr<channel::Context> context) {\n  TP_THROW_ASSERT_IF(channel.empty());\n  TP_THROW_ASSERT_IF(channels_.find(channel) != channels_.end())\n      << \"channel \" << channel << \" already registered\";\n  TP_THROW_ASSERT_IF(\n      channelsByPriority_.find(-priority) != channelsByPriority_.end())\n      << \"channel with priority \" << priority << \" already registered\";\n  if (!context->isViable()) {\n    TP_VLOG(1) << \"Context \" << id_ << \" is not registering channel \" << channel\n               << \" because it is not viable\";\n    return;\n  }\n  TP_VLOG(1) << \"Context \" << id_ << \" is registering channel \" << channel;\n  context->setId(id_ + \".ch_\" + channel);\n  channels_.emplace(channel, context);\n  // Reverse the priority, as the pipe will pick the *first* available channel\n  // it can find in the ordered map, so higher priorities should come first.\n  channelsByPriority_.emplace(-priority, std::make_tuple(channel, context));\n}\n\nstd::shared_ptr<Listener> ContextImpl::listen(\n    const std::vector<std::string>& urls) {\n  std::string listenerId =\n      id_ + \"[l\" + std::to_string(listenerCounter_++) + \"]\";\n  TP_VLOG(1) << \"Context \" << id_ << \" is opening listener \" << listenerId;\n  return std::make_shared<Listener>(\n      Listener::ConstructorToken(),\n      shared_from_this(),\n      std::move(listenerId),\n      urls);\n}\n\nstd::shared_ptr<Pipe> ContextImpl::connect(\n    const std::string& url,\n    PipeOptions opts) {\n  std::string pipeId = id_ + \".p\" + std::to_string(pipeCounter_++);\n  TP_VLOG(1) << \"Context \" << id_ << \" is opening pipe \" << pipeId;\n  std::string remoteContextName = std::move(opts.remoteName_);\n  if (remoteContextName != \"\") {\n    std::string aliasPipeId = id_ + \"_to_\" + remoteContextName;\n    TP_VLOG(1) << \"Pipe \" << pipeId << \" aliased as \" << aliasPipeId;\n    pipeId = std::move(aliasPipeId);\n  }\n  return std::make_shared<Pipe>(\n      Pipe::ConstructorToken(),\n      shared_from_this(),\n      std::move(pipeId),\n      std::move(remoteContextName),\n      url);\n}\n\nstd::shared_ptr<transport::Context> ContextImpl::getTransport(\n    const std::string& transport) {\n  auto iter = transports_.find(transport);\n  if (iter == transports_.end()) {\n    TP_THROW_EINVAL() << \"unsupported transport \" << transport;\n  }\n  return iter->second;\n}\n\nstd::shared_ptr<channel::Context> ContextImpl::getChannel(\n    const std::string& channel) {\n  auto iter = channels_.find(channel);\n  if (iter == channels_.end()) {\n    TP_THROW_EINVAL() << \"unsupported channel \" << channel;\n  }\n  return iter->second;\n}\n\nconst ContextImpl::TOrderedTransports& ContextImpl::getOrderedTransports() {\n  return transportsByPriority_;\n}\n\nconst ContextImpl::TOrderedChannels& ContextImpl::getOrderedChannels() {\n  return channelsByPriority_;\n}\n\nconst std::string& ContextImpl::getName() {\n  return name_;\n}\n\nvoid ContextImpl::enroll(ListenerImpl& listener) {\n  TP_DCHECK(inLoop());\n  bool wasInserted;\n  std::tie(std::ignore, wasInserted) =\n      listeners_.emplace(&listener, listener.shared_from_this());\n  TP_DCHECK(wasInserted);\n}\n\nvoid ContextImpl::enroll(PipeImpl& pipe) {\n  TP_DCHECK(inLoop());\n  bool wasInserted;\n  std::tie(std::ignore, wasInserted) =\n      pipes_.emplace(&pipe, pipe.shared_from_this());\n  TP_DCHECK(wasInserted);\n}\n\nvoid ContextImpl::unenroll(ListenerImpl& listener) {\n  TP_DCHECK(inLoop());\n  auto numRemoved = listeners_.erase(&listener);\n  TP_DCHECK_EQ(numRemoved, 1);\n}\n\nvoid ContextImpl::unenroll(PipeImpl& pipe) {\n  TP_DCHECK(inLoop());\n  auto numRemoved = pipes_.erase(&pipe);\n  TP_DCHECK_EQ(numRemoved, 1);\n}\n\nbool ContextImpl::closed() {\n  TP_DCHECK(inLoop());\n  return error_;\n}\n\nvoid ContextImpl::deferToLoop(TTask fn) {\n  loop_.deferToLoop(std::move(fn));\n}\n\nbool ContextImpl::inLoop() const {\n  return loop_.inLoop();\n}\n\nvoid ContextImpl::close() {\n  deferToLoop([this]() { closeFromLoop(); });\n}\n\nvoid ContextImpl::closeFromLoop() {\n  TP_DCHECK(inLoop());\n  TP_VLOG(1) << \"Context \" << id_ << \" is closing\";\n  setError(TP_CREATE_ERROR(ContextClosedError));\n  TP_VLOG(1) << \"Context \" << id_ << \" done closing\";\n}\n\nvoid ContextImpl::setError(Error error) {\n  // Don't overwrite an error that's already set.\n  if (error_ || !error) {\n    return;\n  }\n\n  error_ = std::move(error);\n\n  handleError();\n}\n\nvoid ContextImpl::handleError() {\n  TP_DCHECK(inLoop());\n  TP_VLOG(5) << \"Context \" << id_ << \" is handling error \" << error_.what();\n\n  // Make a copy as they could unenroll themselves inline.\n  auto listenersCopy = listeners_;\n  auto pipesCopy = pipes_;\n  // We call closeFromLoop, rather than just close, because we need these\n  // objects to transition _immediately_ to error, \"atomically\". If we just\n  // deferred closing to later, this could come after some already-enqueued\n  // operations that could try to access the context, which would be closed,\n  // and this could fail.\n  for (auto& iter : listenersCopy) {\n    iter.second->closeFromLoop();\n  }\n  for (auto& iter : pipesCopy) {\n    iter.second->closeFromLoop();\n  }\n\n  for (auto& iter : transports_) {\n    iter.second->close();\n  }\n  for (auto& iter : channels_) {\n    iter.second->close();\n  }\n}\n\nvoid ContextImpl::join() {\n  close();\n\n  if (!joined_.exchange(true)) {\n    TP_VLOG(1) << \"Context \" << id_ << \" is joining\";\n\n    // As closing is deferred to the loop, we must wait for close to be actually\n    // called before we join, to avoid race conditions. For this, we defer\n    // another task to the loop, which we know will run after the closing, and\n    // then we wait for that task to be run.\n    std::promise<void> hasClosed;\n    deferToLoop([&]() { hasClosed.set_value(); });\n    hasClosed.get_future().wait();\n\n    for (auto& iter : transports_) {\n      iter.second->join();\n    }\n    for (auto& iter : channels_) {\n      iter.second->join();\n    }\n\n    TP_VLOG(1) << \"Context \" << id_ << \" done joining\";\n\n    TP_DCHECK(listeners_.empty());\n    TP_DCHECK(pipes_.empty());\n  }\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <map>\n#include <memory>\n#include <string>\n#include <tuple>\n#include <unordered_map>\n#include <vector>\n\n#include <tensorpipe/channel/context.h>\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/core/context.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\n\nclass ListenerImpl;\nclass PipeImpl;\n\nclass ContextImpl final : public virtual DeferredExecutor,\n                          public std::enable_shared_from_this<ContextImpl> {\n public:\n  explicit ContextImpl(ContextOptions opts);\n\n  void init();\n\n  void registerTransport(\n      int64_t priority,\n      std::string transport,\n      std::shared_ptr<transport::Context> context);\n\n  void registerChannel(\n      int64_t priority,\n      std::string channel,\n      std::shared_ptr<channel::Context> context);\n\n  std::shared_ptr<Listener> listen(const std::vector<std::string>& urls);\n\n  std::shared_ptr<Pipe> connect(const std::string& url, PipeOptions opts);\n\n  std::shared_ptr<transport::Context> getTransport(\n      const std::string& transport);\n  std::shared_ptr<channel::Context> getChannel(const std::string& channel);\n\n  using TOrderedTransports = std::map<\n      int64_t,\n      std::tuple<std::string, std::shared_ptr<transport::Context>>>;\n\n  const TOrderedTransports& getOrderedTransports();\n\n  using TOrderedChannels = std::\n      map<int64_t, std::tuple<std::string, std::shared_ptr<channel::Context>>>;\n\n  const TOrderedChannels& getOrderedChannels();\n\n  // Return the name given to the context's constructor. It will be retrieved\n  // by the pipes and listener in order to attach it to logged messages.\n  const std::string& getName();\n\n  // Enrolling dependent objects (listeners and pipes) causes them to be kept\n  // alive for as long as the context exists. These objects should enroll\n  // themselves as soon as they're created (in their initFromLoop method) and\n  // unenroll themselves after they've completed handling an error (either right\n  // in the handleError method or in a subsequent callback). The context, on the\n  // other hand, should avoid terminating (i.e., complete joining) until all\n  // objects have unenrolled themselves.\n  void enroll(ListenerImpl& listener);\n  void enroll(PipeImpl& pipe);\n  void unenroll(ListenerImpl& listener);\n  void unenroll(PipeImpl& pipe);\n\n  // Return whether the context is in a closed state. To avoid race conditions,\n  // this must be called from within the loop.\n  bool closed();\n\n  // Implement DeferredExecutor interface.\n  void deferToLoop(TTask fn) override;\n  bool inLoop() const override;\n\n  void close();\n\n  void join();\n\n private:\n  OnDemandDeferredExecutor loop_;\n\n  Error error_{Error::kSuccess};\n\n  std::atomic<bool> joined_{false};\n\n  // An identifier for the context, either consisting of the user-provided name\n  // for this context (see below) or, by default, composed of unique information\n  // about the host and process, combined with an increasing sequence number. It\n  // will be used as a prefix for the identifiers of listeners and pipes. All of\n  // them will only be used for logging and debugging purposes.\n  std::string id_;\n\n  // Sequence numbers for the listeners and pipes created by this context, used\n  // to create their identifiers based off this context's identifier. They will\n  // only be used for logging and debugging.\n  std::atomic<uint64_t> listenerCounter_{0};\n  std::atomic<uint64_t> pipeCounter_{0};\n\n  // Store shared_ptrs to dependent objects that have enrolled themselves to\n  // keep them alive. We use a map, indexed by raw pointers, rather than a set\n  // of shared_ptrs so that we can erase objects without them having to create\n  // a fresh shared_ptr just for that.\n  std::unordered_map<ListenerImpl*, std::shared_ptr<ListenerImpl>> listeners_;\n  std::unordered_map<PipeImpl*, std::shared_ptr<PipeImpl>> pipes_;\n\n  // A user-provided name for this context which should be semantically\n  // meaningful. It will only be used for logging and debugging purposes, to\n  // identify the endpoints of a pipe.\n  std::string name_;\n\n  std::unordered_map<std::string, std::shared_ptr<transport::Context>>\n      transports_;\n\n  using TContextMap =\n      std::unordered_map<std::string, std::shared_ptr<channel::Context>>;\n  TContextMap channels_;\n\n  TOrderedTransports transportsByPriority_;\n\n  TOrderedChannels channelsByPriority_;\n\n  CallbackWrapper<ContextImpl> callbackWrapper_{*this, *this};\n\n  void initFromLoop();\n  void closeFromLoop();\n  void setError(Error error);\n  void handleError();\n\n  template <typename T>\n  friend class CallbackWrapper;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/error.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/core/error.h>\n\n#include <sstream>\n\nnamespace tensorpipe {\n\nstd::string LogicError::what() const {\n  std::ostringstream ss;\n  ss << \"logic error: \" << reason_;\n  return ss.str();\n}\n\nstd::string ContextClosedError::what() const {\n  return \"context closed\";\n}\n\nstd::string ListenerClosedError::what() const {\n  return \"listener closed\";\n}\n\nstd::string PipeClosedError::what() const {\n  return \"pipe closed\";\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/error.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n\n#include <tensorpipe/common/error.h>\n\nnamespace tensorpipe {\n\nclass LogicError final : public BaseError {\n public:\n  explicit LogicError(std::string reason) : reason_(std::move(reason)) {}\n\n  std::string what() const override;\n\n private:\n  const std::string reason_;\n};\n\nclass ContextClosedError final : public BaseError {\n public:\n  explicit ContextClosedError() {}\n\n  std::string what() const override;\n};\n\nclass ListenerClosedError final : public BaseError {\n public:\n  explicit ListenerClosedError() {}\n\n  std::string what() const override;\n};\n\nclass PipeClosedError final : public BaseError {\n public:\n  explicit PipeClosedError() {}\n\n  std::string what() const override;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/listener.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/core/listener.h>\n\n#include <map>\n#include <memory>\n#include <string>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/core/listener_impl.h>\n\nnamespace tensorpipe {\n\nListener::Listener(\n    ConstructorToken /* unused */,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    const std::vector<std::string>& urls)\n    : impl_(std::make_shared<ListenerImpl>(\n          std::move(context),\n          std::move(id),\n          urls)) {\n  impl_->init();\n}\n\nvoid Listener::close() {\n  impl_->close();\n}\n\nListener::~Listener() {\n  close();\n}\n\nvoid Listener::accept(accept_callback_fn fn) {\n  impl_->accept(std::move(fn));\n}\n\nconst std::map<std::string, std::string>& Listener::addresses() const {\n  return impl_->addresses();\n}\n\nconst std::string& Listener::address(const std::string& transport) const {\n  return impl_->address(transport);\n}\n\nstd::string Listener::url(const std::string& transport) const {\n  return impl_->url(transport);\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/listener.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <functional>\n#include <map>\n#include <memory>\n#include <string>\n#include <vector>\n\n#include <tensorpipe/common/error.h>\n\nnamespace tensorpipe {\n\nclass ContextImpl;\nclass ListenerImpl;\nclass Pipe;\n\n// The listener.\n//\n// Listeners are used to produce pipes. Depending on the type of the\n// context, listeners may use a variety of addresses to listen on. For\n// example, for TCP/IP sockets they listen on an IPv4 or IPv6 address,\n// for Unix domain sockets they listen on a path, etcetera.\n//\n// A pipe can only be accepted from this listener after it has been\n// fully established. This means that both its connection and all its\n// side channels have been established.\n//\nclass Listener final {\n  // Use the passkey idiom to allow make_shared to call what should be a private\n  // constructor. See https://abseil.io/tips/134 for more information.\n  struct ConstructorToken {};\n\n public:\n  Listener(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      const std::vector<std::string>& urls);\n\n  //\n  // Entry points for user code\n  //\n\n  using accept_callback_fn =\n      std::function<void(const Error&, std::shared_ptr<Pipe>)>;\n\n  void accept(accept_callback_fn fn);\n\n  // Returns map with the materialized address of listeners by transport.\n  //\n  // If you don't bind a transport listener to a specific port or address, it\n  // may generate its address automatically. Then, in order to connect to the\n  // listener, the user must use a separate mechanism to communicate the\n  // materialized address to whoever wants to connect.\n  //\n  const std::map<std::string, std::string>& addresses() const;\n\n  // Returns materialized address for specific transport.\n  //\n  // See `addresses()` for more information.\n  //\n  const std::string& address(const std::string& transport) const;\n\n  // Returns URL with materialized address for specific transport.\n  //\n  // See `addresses()` for more information.\n  //\n  std::string url(const std::string& transport) const;\n\n  // Put the listener in a terminal state, aborting its pending operations and\n  // rejecting future ones, and release its resrouces. This may be carried out\n  // asynchronously, in background. Since the pipes may occasionally use the\n  // listener to open new connections, closing a listener may trigger errors\n  // in the pipes.\n  void close();\n\n  ~Listener();\n\n private:\n  // Using a shared_ptr allows us to detach the lifetime of the implementation\n  // from the public object's one and perform the destruction asynchronously.\n  const std::shared_ptr<ListenerImpl> impl_;\n\n  // Allow context to access constructor token.\n  friend ContextImpl;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/listener_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/core/listener_impl.h>\n\n#include <functional>\n#include <map>\n#include <memory>\n#include <string>\n#include <tuple>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/common/address.h>\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/core/context_impl.h>\n#include <tensorpipe/core/error.h>\n#include <tensorpipe/core/nop_types.h>\n#include <tensorpipe/core/pipe.h>\n#include <tensorpipe/core/pipe_impl.h>\n#include <tensorpipe/transport/connection.h>\n#include <tensorpipe/transport/listener.h>\n\nnamespace tensorpipe {\n\nListenerImpl::ListenerImpl(\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    const std::vector<std::string>& urls)\n    : context_(std::move(context)), id_(std::move(id)) {\n  for (const auto& url : urls) {\n    std::string transport;\n    std::string address;\n    std::tie(transport, address) = splitSchemeOfURL(url);\n    std::shared_ptr<transport::Context> context =\n        context_->getTransport(transport);\n    std::shared_ptr<transport::Listener> listener = context->listen(address);\n    listener->setId(id_ + \".tr_\" + transport);\n    addresses_.emplace(transport, listener->addr());\n    listeners_.emplace(transport, std::move(listener));\n  }\n}\n\nvoid ListenerImpl::init() {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}]() { impl->initFromLoop(); });\n}\n\nvoid ListenerImpl::initFromLoop() {\n  TP_DCHECK(context_->inLoop());\n\n  if (context_->closed()) {\n    // Set the error without calling setError because we do not want to invoke\n    // handleError as it would find itself in a weird state (since the rest of\n    // initFromLoop wouldn't have been called).\n    error_ = TP_CREATE_ERROR(ListenerClosedError);\n    TP_VLOG(1) << \"Listener \" << id_ << \" is closing (without initing)\";\n    return;\n  }\n\n  context_->enroll(*this);\n\n  for (const auto& listener : listeners_) {\n    armListener(listener.first);\n  }\n}\n\nvoid ListenerImpl::close() {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}]() { impl->closeFromLoop(); });\n}\n\nvoid ListenerImpl::closeFromLoop() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(1) << \"Listener \" << id_ << \" is closing\";\n  setError(TP_CREATE_ERROR(ListenerClosedError));\n}\n\n//\n// Entry points for user code\n//\n\nvoid ListenerImpl::accept(accept_callback_fn fn) {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}, fn{std::move(fn)}]() mutable {\n        impl->acceptFromLoop(std::move(fn));\n      });\n}\n\nvoid ListenerImpl::acceptFromLoop(accept_callback_fn fn) {\n  TP_DCHECK(context_->inLoop());\n\n  uint64_t sequenceNumber = nextPipeBeingAccepted_++;\n  TP_VLOG(1) << \"Listener \" << id_ << \" received an accept request (#\"\n             << sequenceNumber << \")\";\n\n  fn = [this, sequenceNumber, fn{std::move(fn)}](\n           const Error& error, std::shared_ptr<Pipe> pipe) {\n    TP_DCHECK_EQ(sequenceNumber, nextAcceptCallbackToCall_++);\n    TP_VLOG(1) << \"Listener \" << id_ << \" is calling an accept callback (#\"\n               << sequenceNumber << \")\";\n    fn(error, std::move(pipe));\n    TP_VLOG(1) << \"Listener \" << id_ << \" done calling an accept callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  if (error_) {\n    fn(error_, std::shared_ptr<Pipe>());\n    return;\n  }\n\n  acceptCallback_.arm(std::move(fn));\n}\n\nconst std::map<std::string, std::string>& ListenerImpl::addresses() const {\n  // As this is an immutable member (after it has been initialized in\n  // the constructor), we'll access it without deferring to the loop.\n  return addresses_;\n}\n\nconst std::string& ListenerImpl::address(const std::string& transport) const {\n  // As this is an immutable member (after it has been initialized in\n  // the constructor), we'll access it without deferring to the loop.\n  const auto it = addresses_.find(transport);\n  TP_THROW_ASSERT_IF(it == addresses_.end())\n      << \": transport '\" << transport << \"' not in use by this listener.\";\n  return it->second;\n}\n\nstd::string ListenerImpl::url(const std::string& transport) const {\n  // As this is an immutable member (after it has been initialized in\n  // the constructor), we'll access it without deferring to the loop.\n  return transport + \"://\" + address(transport);\n}\n\n//\n// Entry points for internal code\n//\n\nuint64_t ListenerImpl::registerConnectionRequest(\n    connection_request_callback_fn fn) {\n  TP_DCHECK(context_->inLoop());\n\n  uint64_t registrationId = nextConnectionRequestRegistrationId_++;\n\n  TP_VLOG(1) << \"Listener \" << id_\n             << \" received a connection request registration (#\"\n             << registrationId << \")\";\n\n  fn = [this, registrationId, fn{std::move(fn)}](\n           const Error& error,\n           std::string transport,\n           std::shared_ptr<transport::Connection> connection) {\n    TP_VLOG(1) << \"Listener \" << id_\n               << \" is calling a connection request registration callback (#\"\n               << registrationId << \")\";\n    fn(error, std::move(transport), std::move(connection));\n    TP_VLOG(1) << \"Listener \" << id_\n               << \" done calling a connection request registration callback (#\"\n               << registrationId << \")\";\n  };\n\n  if (error_) {\n    fn(error_, std::string(), std::shared_ptr<transport::Connection>());\n  } else {\n    connectionRequestRegistrations_.emplace(registrationId, std::move(fn));\n  }\n\n  return registrationId;\n}\n\nvoid ListenerImpl::unregisterConnectionRequest(uint64_t registrationId) {\n  TP_DCHECK(context_->inLoop());\n\n  TP_VLOG(1) << \"Listener \" << id_\n             << \" received a connection request de-registration (#\"\n             << registrationId << \")\";\n\n  connectionRequestRegistrations_.erase(registrationId);\n}\n\n//\n// Error handling\n//\n\nvoid ListenerImpl::setError(Error error) {\n  // Don't overwrite an error that's already set.\n  if (error_ || !error) {\n    return;\n  }\n\n  error_ = std::move(error);\n\n  handleError();\n}\n\nvoid ListenerImpl::handleError() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(2) << \"Listener \" << id_ << \" is handling error \" << error_.what();\n\n  acceptCallback_.triggerAll([&]() {\n    return std::make_tuple(std::cref(error_), std::shared_ptr<Pipe>());\n  });\n  for (auto& iter : connectionRequestRegistrations_) {\n    connection_request_callback_fn fn = std::move(iter.second);\n    fn(error_, std::string(), std::shared_ptr<transport::Connection>());\n  }\n  connectionRequestRegistrations_.clear();\n\n  for (const auto& listener : listeners_) {\n    listener.second->close();\n  }\n\n  for (const auto& connection : connectionsWaitingForHello_) {\n    connection->close();\n  }\n  connectionsWaitingForHello_.clear();\n\n  context_->unenroll(*this);\n}\n\n//\n// Everything else\n//\n\nvoid ListenerImpl::onAccept(\n    std::string transport,\n    std::shared_ptr<transport::Connection> connection) {\n  TP_DCHECK(context_->inLoop());\n  // Keep it alive until we figure out what to do with it.\n  connectionsWaitingForHello_.insert(connection);\n  auto nopHolderIn = std::make_shared<NopHolder<Packet>>();\n  TP_VLOG(3) << \"Listener \" << id_\n             << \" is reading nop object (spontaneous or requested connection)\";\n  connection->read(\n      *nopHolderIn,\n      callbackWrapper_([nopHolderIn,\n                        transport{std::move(transport)},\n                        connection](ListenerImpl& impl) mutable {\n        TP_VLOG(3)\n            << \"Listener \" << impl.id_\n            << \" done reading nop object (spontaneous or requested connection)\";\n        if (impl.error_) {\n          return;\n        }\n        impl.connectionsWaitingForHello_.erase(connection);\n        impl.onConnectionHelloRead(\n            std::move(transport),\n            std::move(connection),\n            nopHolderIn->getObject());\n      }));\n}\n\nvoid ListenerImpl::armListener(std::string transport) {\n  TP_DCHECK(context_->inLoop());\n  auto iter = listeners_.find(transport);\n  if (iter == listeners_.end()) {\n    TP_THROW_EINVAL() << \"unsupported transport \" << transport;\n  }\n  auto transportListener = iter->second;\n  TP_VLOG(3) << \"Listener \" << id_ << \" is accepting connection on transport \"\n             << transport;\n  transportListener->accept(\n      callbackWrapper_([transport](\n                           ListenerImpl& impl,\n                           std::shared_ptr<transport::Connection> connection) {\n        TP_VLOG(3) << \"Listener \" << impl.id_\n                   << \" done accepting connection on transport \" << transport;\n        if (impl.error_) {\n          return;\n        }\n        impl.onAccept(transport, std::move(connection));\n        impl.armListener(transport);\n      }));\n}\n\nvoid ListenerImpl::onConnectionHelloRead(\n    std::string transport,\n    std::shared_ptr<transport::Connection> connection,\n    const Packet& nopPacketIn) {\n  TP_DCHECK(context_->inLoop());\n  if (nopPacketIn.is<SpontaneousConnection>()) {\n    const SpontaneousConnection& nopSpontaneousConnection =\n        *nopPacketIn.get<SpontaneousConnection>();\n    TP_VLOG(3) << \"Listener \" << id_ << \" got spontaneous connection\";\n    std::string pipeId = id_ + \".p\" + std::to_string(pipeCounter_++);\n    TP_VLOG(1) << \"Listener \" << id_ << \" is opening pipe \" << pipeId;\n    const std::string& remoteContextName = nopSpontaneousConnection.contextName;\n    if (remoteContextName != \"\") {\n      std::string aliasPipeId = id_ + \"_from_\" + remoteContextName;\n      TP_VLOG(1) << \"Pipe \" << pipeId << \" aliased as \" << aliasPipeId;\n      pipeId = std::move(aliasPipeId);\n    }\n    auto pipe = std::make_shared<PipeImpl>(\n        context_,\n        shared_from_this(),\n        std::move(pipeId),\n        remoteContextName,\n        std::move(transport),\n        std::move(connection));\n    // We initialize the pipe from the loop immediately, inline, because the\n    // initialization of a pipe accepted by a listener happens partly in the\n    // listener and partly in the pipe's initFromLoop, and we need these two\n    // steps to happen \"atomically\" to make it impossible for an error to occur\n    // in between.\n    pipe->initFromLoop();\n    acceptCallback_.trigger(\n        Error::kSuccess,\n        std::make_shared<Pipe>(Pipe::ConstructorToken(), std::move(pipe)));\n  } else if (nopPacketIn.is<RequestedConnection>()) {\n    const RequestedConnection& nopRequestedConnection =\n        *nopPacketIn.get<RequestedConnection>();\n    uint64_t registrationId = nopRequestedConnection.registrationId;\n    TP_VLOG(3) << \"Listener \" << id_ << \" got requested connection (#\"\n               << registrationId << \")\";\n    auto iter = connectionRequestRegistrations_.find(registrationId);\n    // The connection request may have already been deregistered, for example\n    // because the pipe may have been closed.\n    if (iter != connectionRequestRegistrations_.end()) {\n      auto fn = std::move(iter->second);\n      connectionRequestRegistrations_.erase(iter);\n      fn(Error::kSuccess, std::move(transport), std::move(connection));\n    }\n  } else {\n    TP_LOG_ERROR() << \"packet contained unknown content: \"\n                   << nopPacketIn.index();\n  }\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/listener_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <functional>\n#include <map>\n#include <memory>\n#include <string>\n#include <unordered_map>\n#include <unordered_set>\n#include <vector>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/core/context_impl.h>\n#include <tensorpipe/core/listener.h>\n#include <tensorpipe/core/nop_types.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\n\nclass ContextImpl;\n\nclass ListenerImpl final : public std::enable_shared_from_this<ListenerImpl> {\n public:\n  ListenerImpl(\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      const std::vector<std::string>& urls);\n\n  // Called by the listener's constructor.\n  void init();\n\n  using accept_callback_fn = Listener::accept_callback_fn;\n\n  void accept(accept_callback_fn fn);\n\n  const std::map<std::string, std::string>& addresses() const;\n\n  const std::string& address(const std::string& transport) const;\n\n  std::string url(const std::string& transport) const;\n\n  using connection_request_callback_fn = std::function<\n      void(const Error&, std::string, std::shared_ptr<transport::Connection>)>;\n\n  uint64_t registerConnectionRequest(connection_request_callback_fn fn);\n  void unregisterConnectionRequest(uint64_t registrationId);\n\n  void close();\n\n private:\n  void acceptFromLoop(accept_callback_fn fn);\n\n  void closeFromLoop();\n\n  Error error_{Error::kSuccess};\n\n  std::shared_ptr<ContextImpl> context_;\n\n  // An identifier for the listener, composed of the identifier for the context,\n  // combined with an increasing sequence number. It will be used as a prefix\n  // for the identifiers of pipes. All of them will only be used for logging and\n  // debugging purposes.\n  std::string id_;\n\n  // Sequence numbers for the pipes created by this listener, used to create\n  // their identifiers based off this listener's identifier. They will only be\n  // used for logging and debugging.\n  std::atomic<uint64_t> pipeCounter_{0};\n\n  std::unordered_map<std::string, std::shared_ptr<transport::Listener>>\n      listeners_;\n  std::map<std::string, std::string> addresses_;\n\n  // A sequence number for the calls to accept.\n  uint64_t nextPipeBeingAccepted_{0};\n\n  // A sequence number for the invocations of the callbacks of accept.\n  uint64_t nextAcceptCallbackToCall_{0};\n\n  RearmableCallback<const Error&, std::shared_ptr<Pipe>> acceptCallback_;\n\n  // Needed to keep them alive.\n  std::unordered_set<std::shared_ptr<transport::Connection>>\n      connectionsWaitingForHello_;\n\n  uint64_t nextConnectionRequestRegistrationId_{0};\n\n  // FIXME Consider using a (ordered) map, because keys are IDs which are\n  // generated in sequence and thus we can do a quick (but partial) check of\n  // whether a callback is in the map by comparing its ID with the smallest\n  // and largest key, which in an ordered map are the first and last item.\n  std::unordered_map<uint64_t, connection_request_callback_fn>\n      connectionRequestRegistrations_;\n\n  //\n  // Initialization\n  //\n\n  void initFromLoop();\n\n  //\n  // Helpers to prepare callbacks from transports\n  //\n\n  CallbackWrapper<ListenerImpl> callbackWrapper_{*this, *this->context_};\n\n  //\n  // Error handling\n  //\n\n  void setError(Error error);\n\n  void handleError();\n\n  //\n  // Everything else\n  //\n\n  void armListener(std::string transport);\n  void onAccept(\n      std::string transport,\n      std::shared_ptr<transport::Connection> connection);\n  void onConnectionHelloRead(\n      std::string transport,\n      std::shared_ptr<transport::Connection> connection,\n      const Packet& nopPacketIn);\n\n  template <typename T>\n  friend class CallbackWrapper;\n\n  // Contexts do sometimes need to call directly into closeFromLoop, in order to\n  // make sure that some of their operations can happen \"atomically\" on the\n  // connection, without possibly other operations occurring in between (e.g.,\n  // an error).\n  friend ContextImpl;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/message.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstddef>\n#include <string>\n#include <vector>\n\n#include <tensorpipe/common/buffer.h>\n#include <tensorpipe/common/optional.h>\n\nnamespace tensorpipe {\n\n// Messages consist of a primary buffer and zero or more separate\n// buffers. The primary buffer is always a host-side memory region that\n// contains a serialized version of the message we're dealing with. This\n// serialized message, in turn, may have references to the separate\n// buffers that accompany the primary buffer. These separate buffers may\n// point to any type of memory, host-side or device-side.\n//\nclass Message final {\n public:\n  std::string metadata;\n\n  struct Payload {\n    void* data{nullptr};\n    size_t length{0};\n\n    // Users may include arbitrary metadata in the following fields.\n    // This may contain allocation hints for the receiver, for example.\n    std::string metadata;\n  };\n\n  // Holds the payloads that are transferred over the primary connection.\n  std::vector<Payload> payloads;\n\n  struct Tensor {\n    tensorpipe::Buffer buffer;\n    size_t length{0};\n\n    // Users may optionally specify the target device, on which the receiver\n    // should allocate memory for this tensor. If left unset, the receiver will\n    // choose one at their convenience.\n    optional<Device> targetDevice;\n\n    // Users may include arbitrary metadata in the following field.\n    // This may contain allocation hints for the receiver, for example.\n    std::string metadata;\n  };\n\n  // Holds the tensors that are offered to the side channels.\n  std::vector<Tensor> tensors;\n};\n\n// Descriptors consist of metadata required by the receiver to allocate memory\n// for an incoming message.\nclass Descriptor final {\n public:\n  std::string metadata;\n\n  struct Payload {\n    size_t length{0};\n    std::string metadata;\n  };\n  std::vector<Payload> payloads;\n\n  struct Tensor {\n    size_t length{0};\n\n    // This is the sender-side device from which this tensor is being sent.\n    Device sourceDevice;\n\n    // The sender may optionally specify a target device, in which case the\n    // receiver must allocate memory for this tensor on the specified device.\n    optional<Device> targetDevice;\n\n    std::string metadata;\n  };\n  std::vector<Tensor> tensors;\n};\n\n// Allocations consist of actual memory allocations provided by the receiver for\n// an incoming message. They must match the length and target devices specified\n// in the corresponding Descriptor.\nclass Allocation final {\n public:\n  struct Payload {\n    void* data{nullptr};\n  };\n  std::vector<Payload> payloads;\n\n  struct Tensor {\n    tensorpipe::Buffer buffer;\n  };\n  std::vector<Tensor> tensors;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/nop_types.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n#include <unordered_map>\n#include <vector>\n\n#include <nop/serializer.h>\n#include <nop/structure.h>\n#include <nop/types/optional.h>\n#include <nop/types/variant.h>\n\n#include <tensorpipe/common/device.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/core/message.h>\n\nnamespace tensorpipe {\n\nstruct SpontaneousConnection {\n  std::string contextName;\n  NOP_STRUCTURE(SpontaneousConnection, contextName);\n};\n\nstruct RequestedConnection {\n  uint64_t registrationId;\n  NOP_STRUCTURE(RequestedConnection, registrationId);\n};\n\nNOP_EXTERNAL_STRUCTURE(Device, type, index);\n\nstruct Brochure {\n  std::unordered_map<std::string, std::string> transportDomainDescriptors;\n  std::unordered_map<std::string, std::unordered_map<Device, std::string>>\n      channelDeviceDescriptors;\n  NOP_STRUCTURE(Brochure, transportDomainDescriptors, channelDeviceDescriptors);\n};\n\nstruct BrochureAnswer {\n  std::string transport;\n  std::string address;\n  std::unordered_map<uint64_t, uint64_t> transportRegistrationIds;\n  std::string transportDomainDescriptor;\n  std::unordered_map<std::string, std::vector<uint64_t>> channelRegistrationIds;\n  std::unordered_map<std::string, std::unordered_map<Device, std::string>>\n      channelDeviceDescriptors;\n  std::unordered_map<std::pair<Device, Device>, std::string>\n      channelForDevicePair;\n  NOP_STRUCTURE(\n      BrochureAnswer,\n      transport,\n      address,\n      transportRegistrationIds,\n      transportDomainDescriptor,\n      channelRegistrationIds,\n      channelDeviceDescriptors,\n      channelForDevicePair);\n};\n\nNOP_EXTERNAL_STRUCTURE(Descriptor::Payload, length, metadata);\nNOP_EXTERNAL_STRUCTURE(\n    Descriptor::Tensor,\n    length,\n    sourceDevice,\n    targetDevice,\n    metadata);\nNOP_EXTERNAL_STRUCTURE(Descriptor, metadata, payloads, tensors);\n\nstruct DescriptorReply {\n  std::vector<Device> targetDevices;\n  NOP_STRUCTURE(DescriptorReply, targetDevices);\n};\n\nusing Packet = nop::Variant<SpontaneousConnection, RequestedConnection>;\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/pipe.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/core/pipe.h>\n\n#include <memory>\n#include <string>\n#include <utility>\n\n#include <tensorpipe/core/pipe_impl.h>\n\nnamespace tensorpipe {\n\nPipe::Pipe(\n    ConstructorToken /* unused */,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::string remoteName,\n    const std::string& url)\n    : impl_(std::make_shared<PipeImpl>(\n          std::move(context),\n          std::move(id),\n          std::move(remoteName),\n          url)) {\n  impl_->init();\n}\n\nPipe::Pipe(ConstructorToken /* unused */, std::shared_ptr<PipeImpl> impl)\n    : impl_(std::move(impl)) {}\n\nconst std::string& Pipe::getRemoteName() {\n  return impl_->getRemoteName();\n}\n\nPipe::~Pipe() {\n  close();\n}\n\nvoid Pipe::close() {\n  impl_->close();\n}\n\nvoid Pipe::readDescriptor(read_descriptor_callback_fn fn) {\n  impl_->readDescriptor(std::move(fn));\n}\n\nvoid Pipe::read(Allocation allocation, read_callback_fn fn) {\n  impl_->read(std::move(allocation), std::move(fn));\n}\n\nvoid Pipe::write(Message message, write_callback_fn fn) {\n  impl_->write(std::move(message), std::move(fn));\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/pipe.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <functional>\n#include <memory>\n#include <string>\n\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/core/message.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\n\nclass ContextImpl;\nclass ListenerImpl;\nclass PipeImpl;\n\n// The pipe.\n//\n// Pipes represent a set of connections between a pair of processes.\n// Unlike POSIX pipes, they are message oriented instead of byte\n// oriented. Messages that are sent through the pipe may use whatever\n// channels are at their disposal to make it happen. If the pair of\n// processes happen to be colocated on the same machine, they may\n// leverage a region of shared memory to communicate the primary\n// buffer of a message. Secondary buffers may use shared memory as\n// well, if they're located in CPU memory, or use a CUDA device to\n// device copy if they're located in NVIDIA GPU memory. If the pair is\n// located across the world, they may simply use a set of TCP\n// connections to communicate.\n//\nclass Pipe final {\n  // Use the passkey idiom to allow make_shared to call what should be a private\n  // constructor. See https://abseil.io/tips/134 for more information.\n  struct ConstructorToken {};\n\n public:\n  //\n  // Initialization\n  //\n\n  Pipe(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::string remoteName,\n      const std::string& url);\n\n  Pipe(ConstructorToken token, std::shared_ptr<PipeImpl> impl);\n\n  //\n  // Entry points for user code\n  //\n\n  using read_descriptor_callback_fn =\n      std::function<void(const Error&, Descriptor)>;\n\n  void readDescriptor(read_descriptor_callback_fn fn);\n\n  using read_callback_fn = std::function<void(const Error&)>;\n\n  void read(Allocation allocation, read_callback_fn fn);\n\n  using write_callback_fn = std::function<void(const Error&)>;\n\n  void write(Message message, write_callback_fn fn);\n\n  // Retrieve the user-defined name that was given to the constructor of the\n  // context on the remote side, if any (if not, this will be the empty string).\n  // This is intended to help in logging and debugging only.\n  const std::string& getRemoteName();\n\n  // Put the pipe in a terminal state, aborting its pending operations and\n  // rejecting future ones, and release its resrouces. This may be carried out\n  // asynchronously, in background.\n  void close();\n\n  ~Pipe();\n\n private:\n  // Using a shared_ptr allows us to detach the lifetime of the implementation\n  // from the public object's one and perform the destruction asynchronously.\n  const std::shared_ptr<PipeImpl> impl_;\n\n  // Allow context to access constructor token.\n  friend ContextImpl;\n  // Allow listener to access constructor token.\n  friend ListenerImpl;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/pipe_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/core/pipe_impl.h>\n\n#include <map>\n#include <memory>\n#include <tuple>\n#include <unordered_map>\n#include <utility>\n\n#include <tensorpipe/common/address.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/core/context_impl.h>\n#include <tensorpipe/core/error.h>\n#include <tensorpipe/core/listener.h>\n#include <tensorpipe/core/listener_impl.h>\n#include <tensorpipe/transport/connection.h>\n\nnamespace tensorpipe {\n\nnamespace {\n\nvoid parseDescriptorReplyOfMessage(\n    WriteOperation& op,\n    DescriptorReply nopDescriptorReply) {\n  const int numTensors = op.message.tensors.size();\n  size_t targetDeviceIdx = 0;\n  for (size_t tensorIdx = 0; tensorIdx < numTensors; ++tensorIdx) {\n    const Message::Tensor& tensor = op.message.tensors[tensorIdx];\n    WriteOperation::Tensor& tensorBeingSent = op.tensors[tensorIdx];\n    if (!tensor.targetDevice.has_value()) {\n      tensorBeingSent.targetDevice =\n          std::move(nopDescriptorReply.targetDevices[targetDeviceIdx++]);\n    }\n  }\n  TP_DCHECK_EQ(targetDeviceIdx, nopDescriptorReply.targetDevices.size());\n}\n\n// Raise an error if the number of payloads and tensors in the allocation do not\n// match the ones that are expected by the ReadOperation. Also checks that\n// tensors are allocated on the correct devices.\nvoid checkAllocationCompatibility(\n    const Descriptor& descriptor,\n    const Allocation& allocation) {\n  size_t numPayloads = allocation.payloads.size();\n  TP_THROW_ASSERT_IF(numPayloads != descriptor.payloads.size());\n\n  size_t numTensors = allocation.tensors.size();\n  TP_THROW_ASSERT_IF(numTensors != descriptor.tensors.size());\n  for (size_t tensorIdx = 0; tensorIdx < numTensors; tensorIdx++) {\n    const Allocation::Tensor& tensor = allocation.tensors[tensorIdx];\n    const Descriptor::Tensor& tensorDescriptor = descriptor.tensors[tensorIdx];\n    if (tensorDescriptor.targetDevice.has_value()) {\n      TP_THROW_ASSERT_IF(\n          !(tensor.buffer.device() == tensorDescriptor.targetDevice.value()));\n    }\n  }\n}\n\n// Produce a nop object containing a message descriptor using the information\n// contained in the WriteOperation: number and sizes of payloads and tensors,\n// tensor descriptors, ...\nstd::shared_ptr<NopHolder<Descriptor>> makeDescriptorForMessage(\n    const WriteOperation& op) {\n  auto nopHolderOut = std::make_shared<NopHolder<Descriptor>>();\n  Descriptor& nopDescriptor = nopHolderOut->getObject();\n\n  nopDescriptor.metadata = op.message.metadata;\n\n  for (int payloadIdx = 0; payloadIdx < op.message.payloads.size();\n       ++payloadIdx) {\n    const Message::Payload& payload = op.message.payloads[payloadIdx];\n    nopDescriptor.payloads.emplace_back();\n    Descriptor::Payload& nopPayloadDescriptor = nopDescriptor.payloads.back();\n    nopPayloadDescriptor.length = payload.length;\n    nopPayloadDescriptor.metadata = payload.metadata;\n  }\n\n  TP_DCHECK_EQ(op.message.tensors.size(), op.tensors.size());\n  for (int tensorIdx = 0; tensorIdx < op.tensors.size(); ++tensorIdx) {\n    const Message::Tensor& tensor = op.message.tensors[tensorIdx];\n    nopDescriptor.tensors.emplace_back();\n    Descriptor::Tensor& nopTensorDescriptor = nopDescriptor.tensors.back();\n    nopTensorDescriptor.metadata = tensor.metadata;\n    nopTensorDescriptor.sourceDevice = tensor.buffer.device();\n    if (tensor.targetDevice.has_value()) {\n      nopTensorDescriptor.targetDevice = tensor.targetDevice.value();\n    }\n    nopTensorDescriptor.length = tensor.length;\n  }\n\n  return nopHolderOut;\n}\n\nstd::shared_ptr<NopHolder<DescriptorReply>> makeDescriptorReplyForMessage(\n    const ReadOperation& op) {\n  auto nopHolderOut = std::make_shared<NopHolder<DescriptorReply>>();\n  DescriptorReply& nopDescriptorReply = nopHolderOut->getObject();\n\n  for (size_t tensorIdx = 0; tensorIdx < op.descriptor.tensors.size();\n       ++tensorIdx) {\n    if (!op.descriptor.tensors[tensorIdx].targetDevice.has_value()) {\n      const Allocation::Tensor& tensor = op.allocation.tensors[tensorIdx];\n      nopDescriptorReply.targetDevices.push_back(tensor.buffer.device());\n    }\n  }\n\n  return nopHolderOut;\n}\n\nstruct SelectedTransport {\n  std::string name;\n  std::string address;\n  std::string domainDescriptor;\n};\n\nSelectedTransport selectTransport(\n    const ContextImpl::TOrderedTransports& orderedTransports,\n    const std::unordered_map<std::string, std::string>& remoteDomainDescriptors,\n    const std::map<std::string, std::string>& addresses) {\n  for (const auto& transportContextIter : orderedTransports) {\n    const std::string& transportName = std::get<0>(transportContextIter.second);\n    const transport::Context& transportContext =\n        *(std::get<1>(transportContextIter.second));\n\n    // This pipe's listener might not have an address for that transport.\n    const auto addressIter = addresses.find(transportName);\n    if (addressIter == addresses.cend()) {\n      continue;\n    }\n    const auto& address = addressIter->second;\n\n    const auto remoteDomainDescriptorsIter =\n        remoteDomainDescriptors.find(transportName);\n    if (remoteDomainDescriptorsIter == remoteDomainDescriptors.cend()) {\n      continue;\n    }\n    const std::string& remoteDomainDescriptor =\n        remoteDomainDescriptorsIter->second;\n    if (!transportContext.canCommunicateWithRemote(remoteDomainDescriptor)) {\n      continue;\n    }\n\n    return {transportName, address, transportContext.domainDescriptor()};\n  }\n\n  TP_THROW_ASSERT() << \"Could not find a viable transport\";\n  // Returning dummy value to silence compiler warning.\n  return {};\n}\n\nstruct SelectedChannels {\n  std::unordered_map<std::string, std::unordered_map<Device, std::string>>\n      descriptorsMap;\n  std::unordered_map<std::pair<Device, Device>, std::string>\n      channelForDevicePair;\n};\n\nSelectedChannels selectChannels(\n    const ContextImpl::TOrderedChannels& orderedChannels,\n    const std::unordered_map<\n        std::string,\n        std::unordered_map<Device, std::string>>& remoteDescriptorsMap) {\n  SelectedChannels result;\n\n  for (const auto& channelIter : orderedChannels) {\n    const std::string& channelName = std::get<0>(channelIter.second);\n    const channel::Context& channelContext = *std::get<1>(channelIter.second);\n\n    const auto& remoteDescriptorsMapIter =\n        remoteDescriptorsMap.find(channelName);\n    if (remoteDescriptorsMapIter == remoteDescriptorsMap.end()) {\n      continue;\n    }\n\n    const std::unordered_map<Device, std::string>& localDeviceDescriptors =\n        channelContext.deviceDescriptors();\n    const std::unordered_map<Device, std::string>& remoteDeviceDescriptors =\n        remoteDescriptorsMapIter->second;\n\n    bool selected = false;\n    for (const auto& localDescIter : localDeviceDescriptors) {\n      const Device& localDevice = localDescIter.first;\n      const std::string& localDeviceDescriptor = localDescIter.second;\n      for (const auto& remoteDescIter : remoteDeviceDescriptors) {\n        const Device& remoteDevice = remoteDescIter.first;\n        const std::string& remoteDeviceDescriptor = remoteDescIter.second;\n\n        if (!channelContext.canCommunicateWithRemote(\n                localDeviceDescriptor, remoteDeviceDescriptor)) {\n          continue;\n        }\n\n        if (result.channelForDevicePair.count({localDevice, remoteDevice}) !=\n            0) {\n          // A channel with higher priority has already been selected for this\n          // device pair.\n          continue;\n        }\n\n        selected = true;\n        result.channelForDevicePair[{localDevice, remoteDevice}] = channelName;\n      }\n    }\n\n    if (selected) {\n      result.descriptorsMap[channelName] = localDeviceDescriptors;\n    }\n  }\n\n  return result;\n}\n\n} // namespace\n\n//\n// Initialization\n//\n\nPipeImpl::PipeImpl(\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::string remoteName,\n    const std::string& url)\n    : state_(CLIENT_ABOUT_TO_SEND_HELLO_AND_BROCHURE),\n      context_(std::move(context)),\n      id_(std::move(id)),\n      remoteName_(std::move(remoteName)) {\n  std::string address;\n  std::tie(transport_, address) = splitSchemeOfURL(url);\n  descriptorConnection_ =\n      context_->getTransport(transport_)->connect(std::move(address));\n  descriptorConnection_->setId(id_ + \".d.tr_\" + transport_);\n}\n\nPipeImpl::PipeImpl(\n    std::shared_ptr<ContextImpl> context,\n    std::shared_ptr<ListenerImpl> listener,\n    std::string id,\n    std::string remoteName,\n    std::string transport,\n    std::shared_ptr<transport::Connection> connection)\n    : state_(SERVER_WAITING_FOR_BROCHURE),\n      context_(std::move(context)),\n      listener_(std::move(listener)),\n      id_(std::move(id)),\n      remoteName_(std::move(remoteName)),\n      transport_(std::move(transport)),\n      descriptorConnection_(std::move(connection)) {\n  descriptorConnection_->setId(id_ + \".d.tr_\" + transport_);\n}\n\nvoid PipeImpl::init() {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}]() { impl->initFromLoop(); });\n}\n\nvoid PipeImpl::initFromLoop() {\n  TP_DCHECK(context_->inLoop());\n\n  if (context_->closed()) {\n    // Set the error without calling setError because we do not want to invoke\n    // handleError as it would find itself in a weird state (since the rest of\n    // initFromLoop wouldn't have been called).\n    error_ = TP_CREATE_ERROR(PipeClosedError);\n    TP_VLOG(1) << \"Pipe \" << id_ << \" is closing (without initing)\";\n    return;\n  }\n\n  context_->enroll(*this);\n\n  if (state_ == CLIENT_ABOUT_TO_SEND_HELLO_AND_BROCHURE) {\n    auto nopHolderOut = std::make_shared<NopHolder<Packet>>();\n    Packet& nopPacketOut = nopHolderOut->getObject();\n    nopPacketOut.Become(nopPacketOut.index_of<SpontaneousConnection>());\n    SpontaneousConnection& nopSpontaneousConnection =\n        *nopPacketOut.get<SpontaneousConnection>();\n    nopSpontaneousConnection.contextName = context_->getName();\n    TP_VLOG(3) << \"Pipe \" << id_\n               << \" is writing nop object (spontaneous connection)\";\n    descriptorConnection_->write(\n        *nopHolderOut, callbackWrapper_([nopHolderOut](PipeImpl& impl) {\n          TP_VLOG(3) << \"Pipe \" << impl.id_\n                     << \" done writing nop object (spontaneous connection)\";\n        }));\n\n    auto nopHolderOut2 = std::make_shared<NopHolder<Brochure>>();\n    Brochure& nopBrochure = nopHolderOut2->getObject();\n    for (const auto& transportContextIter : context_->getOrderedTransports()) {\n      const std::string& transportName =\n          std::get<0>(transportContextIter.second);\n      const transport::Context& transportContext =\n          *(std::get<1>(transportContextIter.second));\n      nopBrochure.transportDomainDescriptors[transportName] =\n          transportContext.domainDescriptor();\n    }\n    for (const auto& channelContextIter : context_->getOrderedChannels()) {\n      const std::string& channelName = std::get<0>(channelContextIter.second);\n      const channel::Context& channelContext =\n          *(std::get<1>(channelContextIter.second));\n      nopBrochure.channelDeviceDescriptors[channelName] =\n          channelContext.deviceDescriptors();\n    }\n    TP_VLOG(3) << \"Pipe \" << id_ << \" is writing nop object (brochure)\";\n    descriptorConnection_->write(\n        *nopHolderOut2, callbackWrapper_([nopHolderOut2](PipeImpl& impl) {\n          TP_VLOG(3) << \"Pipe \" << impl.id_\n                     << \" done writing nop object (brochure)\";\n        }));\n    state_ = CLIENT_WAITING_FOR_BROCHURE_ANSWER;\n    auto nopHolderIn = std::make_shared<NopHolder<BrochureAnswer>>();\n    TP_VLOG(3) << \"Pipe \" << id_ << \" is reading nop object (brochure answer)\";\n    descriptorConnection_->read(\n        *nopHolderIn, callbackWrapper_([nopHolderIn](PipeImpl& impl) {\n          TP_VLOG(3) << \"Pipe \" << impl.id_\n                     << \" done reading nop object (brochure answer)\";\n          if (!impl.error_) {\n            impl.onReadWhileClientWaitingForBrochureAnswer(\n                nopHolderIn->getObject());\n          }\n        }));\n  }\n  if (state_ == SERVER_WAITING_FOR_BROCHURE) {\n    auto nopHolderIn = std::make_shared<NopHolder<Brochure>>();\n    TP_VLOG(3) << \"Pipe \" << id_ << \" is reading nop object (brochure)\";\n    descriptorConnection_->read(\n        *nopHolderIn, callbackWrapper_([nopHolderIn](PipeImpl& impl) {\n          TP_VLOG(3) << \"Pipe \" << impl.id_\n                     << \" done reading nop object (brochure)\";\n          if (!impl.error_) {\n            impl.onReadWhileServerWaitingForBrochure(nopHolderIn->getObject());\n          }\n        }));\n  }\n}\n\nconst std::string& PipeImpl::getRemoteName() {\n  return remoteName_;\n}\n\nvoid PipeImpl::close() {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}]() { impl->closeFromLoop(); });\n}\n\nvoid PipeImpl::closeFromLoop() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(1) << \"Pipe \" << id_ << \" is closing\";\n  setError(TP_CREATE_ERROR(PipeClosedError));\n}\n\n//\n// Entry points for user code\n//\n\nvoid PipeImpl::readDescriptor(read_descriptor_callback_fn fn) {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}, fn{std::move(fn)}]() mutable {\n        impl->readDescriptorFromLoop(std::move(fn));\n      });\n}\n\nvoid PipeImpl::readDescriptorFromLoop(read_descriptor_callback_fn fn) {\n  TP_DCHECK(context_->inLoop());\n\n  ReadOpIter opIter = readOps_.emplaceBack(nextMessageBeingRead_++);\n  ReadOperation& op = *opIter;\n\n  TP_VLOG(1) << \"Pipe \" << id_ << \" received a readDescriptor request (#\"\n             << op.sequenceNumber << \")\";\n\n  fn = [this, sequenceNumber{op.sequenceNumber}, fn{std::move(fn)}](\n           const Error& error, Descriptor descriptor) {\n    TP_DCHECK_EQ(sequenceNumber, nextReadDescriptorCallbackToCall_++);\n    TP_VLOG(1) << \"Pipe \" << id_ << \" is calling a readDescriptor callback (#\"\n               << sequenceNumber << \")\";\n    fn(error, std::move(descriptor));\n    TP_VLOG(1) << \"Pipe \" << id_ << \" done calling a readDescriptor callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  op.readDescriptorCallback = std::move(fn);\n\n  readOps_.advanceOperation(opIter);\n}\n\nvoid PipeImpl::read(Allocation allocation, read_callback_fn fn) {\n  context_->deferToLoop([impl{this->shared_from_this()},\n                         allocation{std::move(allocation)},\n                         fn{std::move(fn)}]() mutable {\n    impl->readFromLoop(std::move(allocation), std::move(fn));\n  });\n}\n\nvoid PipeImpl::readFromLoop(Allocation allocation, read_callback_fn fn) {\n  TP_DCHECK(context_->inLoop());\n\n  // This is such a bad logical error on the user's side that it doesn't deserve\n  // to pass through the channel for \"expected errors\" (i.e., the callback).\n  // This check fails when there is no message for which we are expecting an\n  // allocation.\n  TP_THROW_ASSERT_IF(!nextMessageGettingAllocation_.has_value());\n  ReadOpIter opIter = nextMessageGettingAllocation_.value();\n  ReadOperation& op = *opIter;\n  nextMessageGettingAllocation_.reset();\n\n  checkAllocationCompatibility(op.descriptor, allocation);\n\n  fn = [this, sequenceNumber{op.sequenceNumber}, fn{std::move(fn)}](\n           const Error& error) {\n    TP_DCHECK_EQ(sequenceNumber, nextReadCallbackToCall_++);\n    TP_VLOG(1) << \"Pipe \" << id_ << \" is calling a read callback (#\"\n               << sequenceNumber << \")\";\n    fn(error);\n    TP_VLOG(1) << \"Pipe \" << id_ << \" done calling a read callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  op.allocation = std::move(allocation);\n  op.readCallback = std::move(fn);\n  op.doneGettingAllocation = true;\n\n  TP_VLOG(1) << \"Pipe \" << id_ << \" received a read request (#\"\n             << op.sequenceNumber << \", containing \"\n             << op.allocation.payloads.size() << \" payloads and \"\n             << op.allocation.tensors.size() << \" tensors)\";\n\n  readOps_.advanceOperation(opIter);\n}\n\nvoid PipeImpl::readPayloadsOfMessage(ReadOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  ReadOperation& op = *opIter;\n\n  TP_VLOG(2) << \"Pipe \" << id_ << \" is reading payloads of message #\"\n             << op.sequenceNumber;\n\n  TP_DCHECK_EQ(connectionState_, AWAITING_PAYLOADS);\n  TP_DCHECK_EQ(messageBeingReadFromConnection_, op.sequenceNumber);\n  for (size_t payloadIdx = 0; payloadIdx < op.allocation.payloads.size();\n       payloadIdx++) {\n    Allocation::Payload& payload = op.allocation.payloads[payloadIdx];\n    Descriptor::Payload& payloadDescriptor = op.descriptor.payloads[payloadIdx];\n    TP_VLOG(3) << \"Pipe \" << id_ << \" is reading payload #\" << op.sequenceNumber\n               << \".\" << payloadIdx;\n    descriptorConnection_->read(\n        payload.data,\n        payloadDescriptor.length,\n        callbackWrapper_(\n            [opIter, payloadIdx](\n                PipeImpl& impl, const void* /* unused */, size_t /* unused */) {\n              TP_VLOG(3) << \"Pipe \" << impl.id_ << \" done reading payload #\"\n                         << opIter->sequenceNumber << \".\" << payloadIdx;\n              opIter->numPayloadsBeingRead--;\n              impl.readOps_.advanceOperation(opIter);\n            }));\n    ++op.numPayloadsBeingRead;\n  }\n  connectionState_ = AWAITING_DESCRIPTOR;\n  ++messageBeingReadFromConnection_;\n}\n\nvoid PipeImpl::receiveTensorsOfMessage(ReadOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  ReadOperation& op = *opIter;\n\n  TP_VLOG(2) << \"Pipe \" << id_ << \" is receiving tensors of message #\"\n             << op.sequenceNumber;\n\n  TP_DCHECK_EQ(op.descriptor.tensors.size(), op.allocation.tensors.size());\n  for (size_t tensorIdx = 0; tensorIdx < op.descriptor.tensors.size();\n       ++tensorIdx) {\n    Allocation::Tensor& tensor = op.allocation.tensors[tensorIdx];\n    const Descriptor::Tensor& tensorDescriptor =\n        op.descriptor.tensors[tensorIdx];\n\n    const Device& localDevice = tensor.buffer.device();\n    const Device& remoteDevice = tensorDescriptor.sourceDevice;\n    const auto& channelIter =\n        channelForDevicePair_.find({localDevice, remoteDevice});\n    TP_THROW_ASSERT_IF(channelIter == channelForDevicePair_.end())\n        << \"Could not find suitable channel for sending from local device \"\n        << localDevice.toString() << \" to remote device \"\n        << remoteDevice.toString();\n\n    const std::string& channelName = channelIter->second;\n    channel::Channel& channel = *channels_.at(channelName);\n    TP_VLOG(3) << \"Pipe \" << id_ << \" is receiving tensor #\"\n               << op.sequenceNumber << \".\" << tensorIdx;\n\n    channel.recv(\n        tensor.buffer,\n        tensorDescriptor.length,\n        callbackWrapper_([opIter, tensorIdx](PipeImpl& impl) {\n          TP_VLOG(3) << \"Pipe \" << impl.id_ << \" done receiving tensor #\"\n                     << opIter->sequenceNumber << \".\" << tensorIdx;\n          opIter->numTensorsBeingReceived--;\n          impl.readOps_.advanceOperation(opIter);\n        }));\n    ++op.numTensorsBeingReceived;\n  }\n}\n\nvoid PipeImpl::writeDescriptorReplyOfMessage(ReadOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  ReadOperation& op = *opIter;\n\n  TP_DCHECK(op.hasMissingTargetDevices);\n\n  std::shared_ptr<NopHolder<DescriptorReply>> holder =\n      makeDescriptorReplyForMessage(op);\n\n  TP_VLOG(3) << \"Pipe \" << id_\n             << \" is writing nop object (message descriptor reply #\"\n             << op.sequenceNumber << \")\";\n  descriptorReplyConnection_->write(\n      *holder,\n      callbackWrapper_(\n          [sequenceNumber{op.sequenceNumber}, holder](PipeImpl& impl) {\n            TP_VLOG(3) << \"Pipe \" << impl.id_\n                       << \" done writing nop object (message descriptor reply #\"\n                       << sequenceNumber << \")\";\n          }));\n}\n\nvoid PipeImpl::write(Message message, write_callback_fn fn) {\n  context_->deferToLoop([impl{this->shared_from_this()},\n                         message{std::move(message)},\n                         fn{std::move(fn)}]() mutable {\n    impl->writeFromLoop(std::move(message), std::move(fn));\n  });\n}\n\nvoid PipeImpl::writeFromLoop(Message message, write_callback_fn fn) {\n  TP_DCHECK(context_->inLoop());\n\n  WriteOpIter opIter = writeOps_.emplaceBack(nextMessageBeingWritten_++);\n  WriteOperation& op = *opIter;\n\n  TP_VLOG(1) << \"Pipe \" << id_ << \" received a write request (#\"\n             << op.sequenceNumber << \", contaning \" << message.payloads.size()\n             << \" payloads and \" << message.tensors.size() << \" tensors)\";\n\n  fn = [this, sequenceNumber{op.sequenceNumber}, fn{std::move(fn)}](\n           const Error& error) {\n    TP_DCHECK_EQ(sequenceNumber, nextWriteCallbackToCall_++);\n    TP_VLOG(1) << \"Pipe \" << id_ << \" is calling a write callback (#\"\n               << sequenceNumber << \")\";\n    fn(error);\n    TP_VLOG(1) << \"Pipe \" << id_ << \" done calling a write callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  size_t numTensors = message.tensors.size();\n  op.tensors.resize(numTensors);\n  for (size_t tensorIdx = 0; tensorIdx < numTensors; ++tensorIdx) {\n    const Message::Tensor& tensor = message.tensors[tensorIdx];\n    WriteOperation::Tensor& tensorBeingSent = op.tensors[tensorIdx];\n    tensorBeingSent.sourceDevice = tensor.buffer.device();\n    if (tensor.targetDevice.has_value()) {\n      tensorBeingSent.targetDevice = *tensor.targetDevice;\n    } else {\n      op.hasMissingTargetDevices = true;\n    }\n  }\n\n  op.message = std::move(message);\n  op.writeCallback = std::move(fn);\n\n  writeOps_.advanceOperation(opIter);\n}\n\n//\n// Helpers to schedule our callbacks into user code\n//\n\nvoid PipeImpl::callReadDescriptorCallback(ReadOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  ReadOperation& op = *opIter;\n\n  op.readDescriptorCallback(error_, op.descriptor);\n  // Reset callback to release the resources it was holding.\n  op.readDescriptorCallback = nullptr;\n}\n\nvoid PipeImpl::callReadCallback(ReadOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  ReadOperation& op = *opIter;\n\n  op.readCallback(error_);\n  // Reset callback to release the resources it was holding.\n  op.readCallback = nullptr;\n}\n\nvoid PipeImpl::callWriteCallback(WriteOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  WriteOperation& op = *opIter;\n\n  op.writeCallback(error_);\n  // Reset callback to release the resources it was holding.\n  op.writeCallback = nullptr;\n}\n\n//\n// Error handling\n//\n\nvoid PipeImpl::setError(Error error) {\n  // Don't overwrite an error that's already set.\n  if (error_ || !error) {\n    return;\n  }\n\n  error_ = std::move(error);\n\n  handleError();\n}\n\nvoid PipeImpl::handleError() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(2) << \"Pipe \" << id_ << \" is handling error \" << error_.what();\n\n  descriptorConnection_->close();\n\n  if (descriptorReplyConnection_) {\n    descriptorReplyConnection_->close();\n  }\n\n  for (auto& channelIter : channels_) {\n    channelIter.second->close();\n  }\n\n  for (const auto& tokenIter : registrationIds_) {\n    listener_->unregisterConnectionRequest(tokenIter.second);\n  }\n  registrationIds_.clear();\n\n  for (const auto& iter : channelRegistrationIds_) {\n    for (const auto& token : iter.second) {\n      listener_->unregisterConnectionRequest(token);\n    }\n  }\n  channelRegistrationIds_.clear();\n  channelReceivedConnections_.clear();\n\n  readOps_.advanceAllOperations();\n  writeOps_.advanceAllOperations();\n\n  context_->unenroll(*this);\n}\n\n//\n// Everything else\n//\n\nvoid PipeImpl::advanceReadOperation(\n    ReadOpIter opIter,\n    ReadOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  ReadOperation& op = *opIter;\n\n  // Needs to go after previous op to ensure ordering of callback invocations.\n  readOps_.attemptTransition(\n      opIter,\n      /*from=*/ReadOperation::UNINITIALIZED,\n      /*to=*/ReadOperation::ASKING_FOR_ALLOCATION,\n      /*cond=*/error_ && prevOpState >= ReadOperation::ASKING_FOR_ALLOCATION,\n      /*actions=*/{&PipeImpl::callReadDescriptorCallback});\n\n  // The ordering on the \"wire\" (the primary connection) is descriptor of op N,\n  // then payloads of op N, then descriptor of op N+1. Hence this transition\n  // must happen after the previous op scheduled its payload read, not just its\n  // descriptor read.\n  readOps_.attemptTransition(\n      opIter,\n      /*from=*/ReadOperation::UNINITIALIZED,\n      /*to=*/ReadOperation::READING_DESCRIPTOR,\n      /*cond=*/!error_ && state_ == ESTABLISHED &&\n          prevOpState >= ReadOperation::READING_PAYLOADS_AND_RECEIVING_TENSORS,\n      /*actions=*/{&PipeImpl::readDescriptorOfMessage});\n\n  // Needs to go after previous op to ensure ordering of callback invocations.\n  readOps_.attemptTransition(\n      opIter,\n      /*from=*/ReadOperation::READING_DESCRIPTOR,\n      /*to=*/ReadOperation::ASKING_FOR_ALLOCATION,\n      /*cond=*/op.doneReadingDescriptor &&\n          prevOpState >= ReadOperation::ASKING_FOR_ALLOCATION,\n      /*actions=*/{&PipeImpl::callReadDescriptorCallback});\n\n  // Needs to wait for previous op to have _received_ the read call, as we can\n  // only have exactly one operation at a time for which we expect a read call.\n  readOps_.attemptTransition(\n      opIter,\n      /*from=*/ReadOperation::ASKING_FOR_ALLOCATION,\n      /*to=*/ReadOperation::ASKING_FOR_ALLOCATION_FIRST_IN_LINE,\n      /*cond=*/op.doneReadingDescriptor &&\n          prevOpState >= ReadOperation::READING_PAYLOADS_AND_RECEIVING_TENSORS,\n      /*actions=*/{&PipeImpl::expectReadCall});\n\n  // Needs to go after previous op to ensure ordering of callback invocations.\n  readOps_.attemptTransition(\n      opIter,\n      /*from=*/ReadOperation::ASKING_FOR_ALLOCATION_FIRST_IN_LINE,\n      /*to=*/ReadOperation::FINISHED,\n      /*cond=*/error_ && op.doneGettingAllocation &&\n          prevOpState >= ReadOperation::FINISHED,\n      /*actions=*/{&PipeImpl::callReadCallback});\n\n  // No need to order this with the previous operation, since all it needs is\n  // to come after this own op's descriptor read.\n  // This transition shortcuts writing the descriptor reply when all target\n  // devices were provided by the sender.\n  readOps_.attemptTransition(\n      opIter,\n      /*from=*/ReadOperation::ASKING_FOR_ALLOCATION_FIRST_IN_LINE,\n      /*to=*/ReadOperation::READING_PAYLOADS_AND_RECEIVING_TENSORS,\n      /*cond=*/!error_ && op.doneGettingAllocation &&\n          !op.hasMissingTargetDevices,\n      /*actions=*/\n      {&PipeImpl::readPayloadsOfMessage, &PipeImpl::receiveTensorsOfMessage});\n\n  // No need to order this with the previous operation, since all it needs is\n  // to come after this own op's descriptor read.\n  readOps_.attemptTransition(\n      opIter,\n      /*from=*/ReadOperation::ASKING_FOR_ALLOCATION_FIRST_IN_LINE,\n      /*to=*/ReadOperation::READING_PAYLOADS_AND_RECEIVING_TENSORS,\n      /*cond=*/!error_ && op.doneGettingAllocation &&\n          op.hasMissingTargetDevices,\n      /*actions=*/\n      {&PipeImpl::readPayloadsOfMessage,\n       &PipeImpl::writeDescriptorReplyOfMessage,\n       &PipeImpl::receiveTensorsOfMessage});\n\n  // Needs to go after previous op to ensure ordering of callback invocations.\n  readOps_.attemptTransition(\n      opIter,\n      /*from=*/ReadOperation::READING_PAYLOADS_AND_RECEIVING_TENSORS,\n      /*to=*/ReadOperation::FINISHED,\n      /*cond=*/op.numPayloadsBeingRead == 0 &&\n          op.numTensorsBeingReceived == 0 &&\n          prevOpState >= ReadOperation::FINISHED,\n      /*actions=*/{&PipeImpl::callReadCallback});\n}\n\nvoid PipeImpl::advanceWriteOperation(\n    WriteOpIter opIter,\n    WriteOperation::State prevOpState) {\n  TP_DCHECK(context_->inLoop());\n\n  WriteOperation& op = *opIter;\n\n  // Needs to go after previous op to ensure ordering of callback invocations.\n  writeOps_.attemptTransition(\n      opIter,\n      /*from=*/WriteOperation::UNINITIALIZED,\n      /*to=*/WriteOperation::FINISHED,\n      /*cond=*/error_ && prevOpState >= WriteOperation::FINISHED,\n      /*actions=*/{&PipeImpl::callWriteCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the connection and send calls on the channels.\n  // This transition shortcuts reading the target devices when they were all\n  // provided by the user.\n  writeOps_.attemptTransition(\n      opIter,\n      /*from=*/WriteOperation::UNINITIALIZED,\n      /*to=*/WriteOperation::WRITING_PAYLOADS_AND_SENDING_TENSORS,\n      /*cond=*/!error_ && state_ == ESTABLISHED &&\n          !op.hasMissingTargetDevices &&\n          prevOpState >= WriteOperation::WRITING_PAYLOADS_AND_SENDING_TENSORS,\n      /*actions=*/\n      {&PipeImpl::writeDescriptorOfMessage,\n       &PipeImpl::writePayloadsOfMessage,\n       &PipeImpl::sendTensorsOfMessage});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of write calls on the descriptor connection and read calls on the\n  // descriptor reply connection.\n  writeOps_.attemptTransition(\n      opIter,\n      /*from=*/WriteOperation::UNINITIALIZED,\n      /*to=*/WriteOperation::WRITING_PAYLOADS_AND_READING_TARGET_DEVICES,\n      /*cond=*/!error_ && state_ == ESTABLISHED && op.hasMissingTargetDevices &&\n          prevOpState >=\n              WriteOperation::WRITING_PAYLOADS_AND_READING_TARGET_DEVICES,\n      /*actions=*/\n      {&PipeImpl::writeDescriptorOfMessage,\n       &PipeImpl::writePayloadsOfMessage,\n       &PipeImpl::readDescriptorReplyOfMessage});\n\n  // Needs to go after previous op to ensure ordering of callback invocations.\n  writeOps_.attemptTransition(\n      opIter,\n      /*from=*/WriteOperation::WRITING_PAYLOADS_AND_READING_TARGET_DEVICES,\n      /*to=*/WriteOperation::FINISHED,\n      /*cond=*/error_ && op.numPayloadsBeingWritten == 0 &&\n          op.doneReadingDescriptorReply &&\n          prevOpState >= WriteOperation::FINISHED,\n      /*actions=*/{&PipeImpl::callWriteCallback});\n\n  // Needs to go after previous op to ensure predictable and consistent ordering\n  // of send calls on channels.\n  writeOps_.attemptTransition(\n      opIter,\n      /*from=*/WriteOperation::WRITING_PAYLOADS_AND_READING_TARGET_DEVICES,\n      /*to=*/WriteOperation::WRITING_PAYLOADS_AND_SENDING_TENSORS,\n      /*cond=*/!error_ && op.doneReadingDescriptorReply &&\n          prevOpState >= WriteOperation::WRITING_PAYLOADS_AND_SENDING_TENSORS,\n      /*actions=*/{&PipeImpl::sendTensorsOfMessage});\n\n  // Needs to go after previous op to ensure ordering of callback invocations.\n  writeOps_.attemptTransition(\n      opIter,\n      /*from=*/WriteOperation::WRITING_PAYLOADS_AND_SENDING_TENSORS,\n      /*to=*/WriteOperation::FINISHED,\n      /*cond=*/op.numPayloadsBeingWritten == 0 && op.numTensorsBeingSent == 0 &&\n          prevOpState >= WriteOperation::FINISHED,\n      /*actions=*/{&PipeImpl::callWriteCallback});\n}\n\nvoid PipeImpl::readDescriptorOfMessage(ReadOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  ReadOperation& op = *opIter;\n\n  TP_DCHECK_EQ(connectionState_, AWAITING_DESCRIPTOR);\n  TP_DCHECK_EQ(messageBeingReadFromConnection_, op.sequenceNumber);\n  auto nopHolderIn = std::make_shared<NopHolder<Descriptor>>();\n  TP_VLOG(3) << \"Pipe \" << id_ << \" is reading nop object (message descriptor #\"\n             << op.sequenceNumber << \")\";\n  descriptorConnection_->read(\n      *nopHolderIn, callbackWrapper_([opIter, nopHolderIn](PipeImpl& impl) {\n        TP_VLOG(3) << \"Pipe \" << impl.id_\n                   << \" done reading nop object (message descriptor #\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingDescriptor = true;\n        if (!impl.error_) {\n          opIter->descriptor = std::move(nopHolderIn->getObject());\n          for (const auto& tensor : opIter->descriptor.tensors) {\n            if (!tensor.targetDevice.has_value()) {\n              opIter->hasMissingTargetDevices = true;\n            }\n          }\n        }\n        impl.readOps_.advanceOperation(opIter);\n      }));\n  connectionState_ = AWAITING_PAYLOADS;\n}\n\nvoid PipeImpl::expectReadCall(ReadOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  ReadOperation& op = *opIter;\n\n  TP_DCHECK(!nextMessageGettingAllocation_.has_value());\n  nextMessageGettingAllocation_ = opIter;\n}\n\nvoid PipeImpl::sendTensorsOfMessage(WriteOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  WriteOperation& op = *opIter;\n\n  TP_VLOG(2) << \"Pipe \" << id_ << \" is sending tensors of message #\"\n             << op.sequenceNumber;\n\n  TP_DCHECK_EQ(op.message.tensors.size(), op.tensors.size());\n  for (size_t tensorIdx = 0; tensorIdx < op.message.tensors.size();\n       ++tensorIdx) {\n    const auto& tensor = op.message.tensors[tensorIdx];\n\n    const Device& localDevice = op.tensors[tensorIdx].sourceDevice;\n    TP_DCHECK(op.tensors[tensorIdx].targetDevice.has_value());\n    const Device& remoteDevice = *op.tensors[tensorIdx].targetDevice;\n    const auto& channelIter =\n        channelForDevicePair_.find({localDevice, remoteDevice});\n    TP_THROW_ASSERT_IF(channelIter == channelForDevicePair_.end())\n        << \"Could not find suitable channel for sending from local device \"\n        << localDevice.toString() << \" to remote device \"\n        << remoteDevice.toString();\n    const std::string& channelName = channelIter->second;\n\n    channel::Channel& channel = *channels_[channelName];\n\n    TP_VLOG(3) << \"Pipe \" << id_ << \" is sending tensor #\" << op.sequenceNumber\n               << \".\" << tensorIdx;\n\n    channel.send(\n        tensor.buffer,\n        tensor.length,\n        callbackWrapper_([opIter, tensorIdx](PipeImpl& impl) {\n          TP_VLOG(3) << \"Pipe \" << impl.id_ << \" done sending tensor #\"\n                     << opIter->sequenceNumber << \".\" << tensorIdx;\n          opIter->numTensorsBeingSent--;\n          impl.writeOps_.advanceOperation(opIter);\n        }));\n\n    ++op.numTensorsBeingSent;\n  }\n}\n\nvoid PipeImpl::writeDescriptorOfMessage(WriteOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  WriteOperation& op = *opIter;\n\n  std::shared_ptr<NopHolder<Descriptor>> holder = makeDescriptorForMessage(op);\n\n  TP_VLOG(3) << \"Pipe \" << id_ << \" is writing nop object (message descriptor #\"\n             << op.sequenceNumber << \")\";\n  descriptorConnection_->write(\n      *holder,\n      callbackWrapper_(\n          [sequenceNumber{op.sequenceNumber}, holder](PipeImpl& impl) {\n            TP_VLOG(3) << \"Pipe \" << impl.id_\n                       << \" done writing nop object (message descriptor #\"\n                       << sequenceNumber << \")\";\n          }));\n}\n\nvoid PipeImpl::writePayloadsOfMessage(WriteOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  WriteOperation& op = *opIter;\n\n  TP_VLOG(2) << \"Pipe \" << id_ << \" is writing payloads of message #\"\n             << op.sequenceNumber;\n\n  for (size_t payloadIdx = 0; payloadIdx < op.message.payloads.size();\n       payloadIdx++) {\n    Message::Payload& payload = op.message.payloads[payloadIdx];\n    TP_VLOG(3) << \"Pipe \" << id_ << \" is writing payload #\" << op.sequenceNumber\n               << \".\" << payloadIdx;\n    descriptorConnection_->write(\n        payload.data,\n        payload.length,\n        callbackWrapper_([opIter, payloadIdx](PipeImpl& impl) {\n          TP_VLOG(3) << \"Pipe \" << impl.id_ << \" done writing payload #\"\n                     << opIter->sequenceNumber << \".\" << payloadIdx;\n          opIter->numPayloadsBeingWritten--;\n          impl.writeOps_.advanceOperation(opIter);\n        }));\n    ++op.numPayloadsBeingWritten;\n  }\n}\n\nvoid PipeImpl::readDescriptorReplyOfMessage(WriteOpIter opIter) {\n  TP_DCHECK(context_->inLoop());\n\n  WriteOperation& op = *opIter;\n\n  TP_DCHECK(op.hasMissingTargetDevices);\n\n  auto nopHolderIn = std::make_shared<NopHolder<DescriptorReply>>();\n  TP_VLOG(3) << \"Pipe \" << id_\n             << \" is reading nop object (message descriptor reply #\"\n             << op.sequenceNumber << \")\";\n  descriptorReplyConnection_->read(\n      *nopHolderIn, callbackWrapper_([opIter, nopHolderIn](PipeImpl& impl) {\n        TP_VLOG(3) << \"Pipe \" << impl.id_\n                   << \" done reading nop object (message descriptor reply #\"\n                   << opIter->sequenceNumber << \")\";\n        opIter->doneReadingDescriptorReply = true;\n        if (!impl.error_) {\n          parseDescriptorReplyOfMessage(\n              *opIter, std::move(nopHolderIn->getObject()));\n        }\n        impl.writeOps_.advanceOperation(opIter);\n      }));\n}\n\nvoid PipeImpl::onReadWhileServerWaitingForBrochure(\n    const Brochure& nopBrochure) {\n  TP_DCHECK(context_->inLoop());\n  TP_DCHECK_EQ(state_, SERVER_WAITING_FOR_BROCHURE);\n\n  auto nopHolderOut = std::make_shared<NopHolder<BrochureAnswer>>();\n  BrochureAnswer& nopBrochureAnswer = nopHolderOut->getObject();\n\n  auto transport = selectTransport(\n      context_->getOrderedTransports(),\n      nopBrochure.transportDomainDescriptors,\n      listener_->addresses());\n\n  if (transport.name != transport_) {\n    transport_ = transport.name;\n    nopBrochureAnswer.transportRegistrationIds[ConnectionId::DESCRIPTOR] =\n        registerTransport(ConnectionId::DESCRIPTOR);\n  }\n  nopBrochureAnswer.transportRegistrationIds[ConnectionId::DESCRIPTOR_REPLY] =\n      registerTransport(ConnectionId::DESCRIPTOR_REPLY);\n\n  nopBrochureAnswer.transport = transport.name;\n  nopBrochureAnswer.address = transport.address;\n  nopBrochureAnswer.transportDomainDescriptor = transport.domainDescriptor;\n\n  SelectedChannels selectedChannels = selectChannels(\n      context_->getOrderedChannels(), nopBrochure.channelDeviceDescriptors);\n  channelForDevicePair_ = std::move(selectedChannels.channelForDevicePair);\n  nopBrochureAnswer.channelForDevicePair = channelForDevicePair_;\n\n  for (auto& descriptorsIter : selectedChannels.descriptorsMap) {\n    const std::string& channelName = descriptorsIter.first;\n    nopBrochureAnswer.channelRegistrationIds[channelName] =\n        registerChannel(channelName);\n    std::unordered_map<Device, std::string>& deviceDescriptors =\n        descriptorsIter.second;\n    nopBrochureAnswer.channelDeviceDescriptors[channelName] =\n        std::move(deviceDescriptors);\n  }\n\n  TP_VLOG(3) << \"Pipe \" << id_ << \" is writing nop object (brochure answer)\";\n  descriptorConnection_->write(\n      *nopHolderOut, callbackWrapper_([nopHolderOut](PipeImpl& impl) {\n        TP_VLOG(3) << \"Pipe \" << impl.id_\n                   << \" done writing nop object (brochure answer)\";\n      }));\n\n  if (!pendingRegistrations()) {\n    state_ = ESTABLISHED;\n    readOps_.advanceAllOperations();\n    writeOps_.advanceAllOperations();\n  } else {\n    state_ = SERVER_WAITING_FOR_CONNECTIONS;\n  }\n}\n\nuint64_t PipeImpl::registerTransport(ConnectionId connId) {\n  TP_DCHECK(registrationIds_.count(connId) == 0);\n  TP_VLOG(3) << \"Pipe \" << id_ << \" is requesting connection (as replacement)\";\n  uint64_t token = listener_->registerConnectionRequest(\n      callbackWrapper_([connId](\n                           PipeImpl& impl,\n                           std::string transport,\n                           std::shared_ptr<transport::Connection> connection) {\n        TP_VLOG(3) << \"Pipe \" << impl.id_\n                   << \" done requesting connection (as replacement)\";\n        if (!impl.error_) {\n          impl.onAcceptWhileServerWaitingForConnection(\n              connId, std::move(transport), std::move(connection));\n        }\n      }));\n  registrationIds_[connId] = token;\n\n  return token;\n}\n\nstd::vector<uint64_t>& PipeImpl::registerChannel(\n    const std::string& channelName) {\n  const channel::Context& channelContext = *context_->getChannel(channelName);\n  const size_t numConnectionsNeeded = channelContext.numConnectionsNeeded();\n  auto& channelRegistrationIds = channelRegistrationIds_[channelName];\n  channelRegistrationIds.resize(numConnectionsNeeded);\n  auto& channelReceivedConnections = channelReceivedConnections_[channelName];\n  channelReceivedConnections.resize(numConnectionsNeeded);\n  for (size_t connId = 0; connId < numConnectionsNeeded; ++connId) {\n    TP_VLOG(3) << \"Pipe \" << id_ << \" is requesting connection \" << connId\n               << \"/\" << numConnectionsNeeded << \" (for channel \" << channelName\n               << \")\";\n    uint64_t token = listener_->registerConnectionRequest(callbackWrapper_(\n        [channelName, connId, numConnectionsNeeded](\n            PipeImpl& impl,\n            std::string transport,\n            std::shared_ptr<transport::Connection> connection) {\n          TP_VLOG(3) << \"Pipe \" << impl.id_ << \" done requesting connection \"\n                     << connId << \"/\" << numConnectionsNeeded\n                     << \" (for channel \" << channelName << \")\";\n          if (!impl.error_) {\n            impl.onAcceptWhileServerWaitingForChannel(\n                channelName,\n                connId,\n                std::move(transport),\n                std::move(connection));\n          }\n        }));\n    channelRegistrationIds[connId] = token;\n  }\n\n  return channelRegistrationIds;\n}\n\nvoid PipeImpl::onReadWhileClientWaitingForBrochureAnswer(\n    const BrochureAnswer& nopBrochureAnswer) {\n  TP_DCHECK(context_->inLoop());\n  TP_DCHECK_EQ(state_, CLIENT_WAITING_FOR_BROCHURE_ANSWER);\n\n  const std::string& transport = nopBrochureAnswer.transport;\n  std::string address = nopBrochureAnswer.address;\n  std::shared_ptr<transport::Context> transportContext =\n      context_->getTransport(transport);\n  TP_DCHECK(transportContext->canCommunicateWithRemote(\n      nopBrochureAnswer.transportDomainDescriptor))\n      << \"The two endpoints disagree on whether transport \" << transport\n      << \" can be used to communicate\";\n\n  if (transport != transport_) {\n    TP_VLOG(3) << \"Pipe \" << id_\n               << \" is opening connection (descriptor, as replacement)\";\n    std::shared_ptr<transport::Connection> connection =\n        transportContext->connect(address);\n    connection->setId(id_ + \".d.tr_\" + transport);\n    const auto& transportRegistrationIter =\n        nopBrochureAnswer.transportRegistrationIds.find(\n            ConnectionId::DESCRIPTOR);\n    TP_DCHECK(\n        transportRegistrationIter !=\n        nopBrochureAnswer.transportRegistrationIds.end());\n    initConnection(*connection, transportRegistrationIter->second);\n\n    transport_ = transport;\n    descriptorConnection_ = std::move(connection);\n  }\n\n  {\n    TP_VLOG(3) << \"Pipe \" << id_ << \" is opening connection (descriptor_reply)\";\n    std::shared_ptr<transport::Connection> connection =\n        transportContext->connect(address);\n    connection->setId(id_ + \".r.tr_\" + transport);\n    const auto& transportRegistrationIter =\n        nopBrochureAnswer.transportRegistrationIds.find(\n            ConnectionId::DESCRIPTOR_REPLY);\n    TP_DCHECK(\n        transportRegistrationIter !=\n        nopBrochureAnswer.transportRegistrationIds.end());\n    initConnection(*connection, transportRegistrationIter->second);\n\n    descriptorReplyConnection_ = std::move(connection);\n  }\n\n  // Recompute the channel map based on this side's channels and priorities.\n  SelectedChannels selectedChannels = selectChannels(\n      context_->getOrderedChannels(),\n      nopBrochureAnswer.channelDeviceDescriptors);\n  channelForDevicePair_ = std::move(selectedChannels.channelForDevicePair);\n\n  // Verify that the locally and remotely computed channel maps are consistent.\n  TP_THROW_ASSERT_IF(\n      nopBrochureAnswer.channelForDevicePair.size() !=\n      channelForDevicePair_.size())\n      << \"Inconsistent channel selection\";\n  for (const auto& iter : channelForDevicePair_) {\n    Device localDevice;\n    Device remoteDevice;\n    std::tie(localDevice, remoteDevice) = iter.first;\n    const std::string& channelName = iter.second;\n\n    const auto& answerIter = nopBrochureAnswer.channelForDevicePair.find(\n        {remoteDevice, localDevice});\n\n    TP_THROW_ASSERT_IF(\n        answerIter == nopBrochureAnswer.channelForDevicePair.end())\n        << \"Inconsistent channel selection\";\n    TP_THROW_ASSERT_IF(answerIter->second != channelName)\n        << \"Inconsistent channel selection\";\n  }\n\n  for (const auto& channelDeviceDescriptorsIter :\n       selectedChannels.descriptorsMap) {\n    const std::string& channelName = channelDeviceDescriptorsIter.first;\n    std::shared_ptr<channel::Context> channelContext =\n        context_->getChannel(channelName);\n\n    const std::vector<uint64_t>& registrationIds =\n        nopBrochureAnswer.channelRegistrationIds.at(channelName);\n    const size_t numConnectionsNeeded = channelContext->numConnectionsNeeded();\n    TP_DCHECK_EQ(numConnectionsNeeded, registrationIds.size());\n    std::vector<std::shared_ptr<transport::Connection>> connections(\n        numConnectionsNeeded);\n    for (size_t connId = 0; connId < numConnectionsNeeded; ++connId) {\n      TP_VLOG(3) << \"Pipe \" << id_ << \" is opening connection \" << connId << \"/\"\n                 << numConnectionsNeeded << \" (for channel \" << channelName\n                 << \")\";\n      std::shared_ptr<transport::Connection> connection =\n          transportContext->connect(address);\n      connection->setId(\n          id_ + \".ch_\" + channelName + \"_\" + std::to_string(connId));\n      initConnection(*connection, registrationIds[connId]);\n      connections[connId] = std::move(connection);\n    }\n\n    std::shared_ptr<channel::Channel> channel = channelContext->createChannel(\n        std::move(connections), channel::Endpoint::kConnect);\n    channel->setId(id_ + \".ch_\" + channelName);\n    channels_.emplace(channelName, std::move(channel));\n  }\n\n  state_ = ESTABLISHED;\n  readOps_.advanceAllOperations();\n  writeOps_.advanceAllOperations();\n}\n\nvoid PipeImpl::initConnection(\n    transport::Connection& connection,\n    uint64_t token) {\n  auto nopHolderOut = std::make_shared<NopHolder<Packet>>();\n  Packet& nopPacketOut = nopHolderOut->getObject();\n  nopPacketOut.Become(nopPacketOut.index_of<RequestedConnection>());\n  RequestedConnection& nopRequestedConnection =\n      *nopPacketOut.get<RequestedConnection>();\n  nopRequestedConnection.registrationId = token;\n  TP_VLOG(3) << \"Pipe \" << id_\n             << \" is writing nop object (requested connection)\";\n  connection.write(\n      *nopHolderOut, callbackWrapper_([nopHolderOut](PipeImpl& impl) {\n        TP_VLOG(3) << \"Pipe \" << impl.id_\n                   << \" done writing nop object (requested connection)\";\n      }));\n}\n\nvoid PipeImpl::onAcceptWhileServerWaitingForConnection(\n    ConnectionId connId,\n    std::string receivedTransport,\n    std::shared_ptr<transport::Connection> receivedConnection) {\n  TP_DCHECK(context_->inLoop());\n  TP_DCHECK_EQ(state_, SERVER_WAITING_FOR_CONNECTIONS);\n  const auto& registrationIdIter = registrationIds_.find(connId);\n  TP_DCHECK(registrationIdIter != registrationIds_.end());\n  size_t token = registrationIdIter->second;\n  listener_->unregisterConnectionRequest(token);\n  registrationIds_.erase(registrationIdIter);\n  TP_DCHECK_EQ(transport_, receivedTransport);\n\n  switch (connId) {\n    case ConnectionId::DESCRIPTOR:\n      receivedConnection->setId(id_ + \".d.tr_\" + receivedTransport);\n      descriptorConnection_ = std::move(receivedConnection);\n      break;\n    case ConnectionId::DESCRIPTOR_REPLY:\n      receivedConnection->setId(id_ + \".r.tr_\" + receivedTransport);\n      descriptorReplyConnection_ = std::move(receivedConnection);\n      break;\n    default:\n      TP_THROW_ASSERT() << \"Unrecognized connection identifier\";\n  }\n\n  if (!pendingRegistrations()) {\n    state_ = ESTABLISHED;\n    readOps_.advanceAllOperations();\n    writeOps_.advanceAllOperations();\n  }\n}\n\nvoid PipeImpl::onAcceptWhileServerWaitingForChannel(\n    std::string channelName,\n    size_t connId,\n    std::string receivedTransport,\n    std::shared_ptr<transport::Connection> receivedConnection) {\n  TP_DCHECK(context_->inLoop());\n  TP_DCHECK_EQ(state_, SERVER_WAITING_FOR_CONNECTIONS);\n  TP_DCHECK_EQ(transport_, receivedTransport);\n  auto channelRegistrationIdsIter = channelRegistrationIds_.find(channelName);\n  TP_DCHECK(channelRegistrationIdsIter != channelRegistrationIds_.end());\n  listener_->unregisterConnectionRequest(\n      channelRegistrationIdsIter->second[connId]);\n  receivedConnection->setId(\n      id_ + \".ch_\" + channelName + \"_\" + std::to_string(connId));\n\n  channelReceivedConnections_[channelName][connId] =\n      std::move(receivedConnection);\n  // TODO: If we can guarantee the order in which the accept() calls happen,\n  // this check can be replaced with `if (connId == numConnectionsNeeded -\n  // 1)`.\n  for (const auto& conn : channelReceivedConnections_[channelName]) {\n    if (conn == nullptr) {\n      return;\n    }\n  }\n\n  std::shared_ptr<channel::Context> channelContext =\n      context_->getChannel(channelName);\n\n  std::shared_ptr<channel::Channel> channel = channelContext->createChannel(\n      std::move(channelReceivedConnections_[channelName]),\n      channel::Endpoint::kListen);\n  channel->setId(id_ + \".ch_\" + channelName);\n\n  channelRegistrationIds_.erase(channelRegistrationIdsIter);\n  channelReceivedConnections_.erase(channelName);\n\n  TP_DCHECK(channels_.find(channelName) == channels_.end());\n  channels_.emplace(channelName, std::move(channel));\n\n  if (!pendingRegistrations()) {\n    state_ = ESTABLISHED;\n    readOps_.advanceAllOperations();\n    writeOps_.advanceAllOperations();\n  }\n}\n\nbool PipeImpl::pendingRegistrations() {\n  if (!registrationIds_.empty()) {\n    return true;\n  }\n\n  if (!channelRegistrationIds_.empty()) {\n    return true;\n  }\n\n  return false;\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/core/pipe_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <deque>\n#include <map>\n#include <memory>\n#include <string>\n#include <tuple>\n#include <unordered_map>\n#include <vector>\n\n#include <tensorpipe/channel/channel.h>\n#include <tensorpipe/common/buffer.h>\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/common/state_machine.h>\n#include <tensorpipe/core/context_impl.h>\n#include <tensorpipe/core/message.h>\n#include <tensorpipe/core/nop_types.h>\n#include <tensorpipe/core/pipe.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\n\nclass ContextImpl;\nclass ListenerImpl;\n\nstruct ReadOperation {\n  enum State {\n    UNINITIALIZED,\n    READING_DESCRIPTOR,\n    ASKING_FOR_ALLOCATION,\n    ASKING_FOR_ALLOCATION_FIRST_IN_LINE,\n    READING_PAYLOADS_AND_RECEIVING_TENSORS,\n    FINISHED\n  };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReadingDescriptor{false};\n  bool doneGettingAllocation{false};\n  uint64_t numPayloadsBeingRead{0};\n  uint64_t numTensorsBeingReceived{0};\n\n  // Callbacks.\n  Pipe::read_descriptor_callback_fn readDescriptorCallback;\n  Pipe::read_callback_fn readCallback;\n\n  // Arguments at creation\n  bool hasMissingTargetDevices{false};\n\n  Descriptor descriptor;\n  // Buffers allocated by the user.\n  Allocation allocation;\n};\n\nstruct WriteOperation {\n  enum State {\n    UNINITIALIZED,\n    WRITING_PAYLOADS_AND_READING_TARGET_DEVICES,\n    WRITING_PAYLOADS_AND_SENDING_TENSORS,\n    FINISHED\n  };\n\n  // Fields used by the state machine\n  uint64_t sequenceNumber{0};\n  State state{UNINITIALIZED};\n\n  // Progress flags\n  bool doneReadingDescriptorReply{false};\n  uint64_t numPayloadsBeingWritten{0};\n  uint64_t numTensorsBeingSent{0};\n\n  // Callbacks.\n  Pipe::write_callback_fn writeCallback;\n\n  // Arguments at creation\n  bool hasMissingTargetDevices{false};\n\n  Message message;\n\n  struct Tensor {\n    Device sourceDevice;\n    optional<Device> targetDevice;\n  };\n  std::vector<Tensor> tensors;\n};\n\nclass PipeImpl final : public std::enable_shared_from_this<PipeImpl> {\n public:\n  PipeImpl(\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::string remoteName,\n      const std::string& url);\n\n  PipeImpl(\n      std::shared_ptr<ContextImpl> context,\n      std::shared_ptr<ListenerImpl> listener,\n      std::string id,\n      std::string remoteName,\n      std::string transport,\n      std::shared_ptr<transport::Connection> connection);\n\n  // Called by the pipe's constructor.\n  void init();\n\n  using read_descriptor_callback_fn = Pipe::read_descriptor_callback_fn;\n  using read_callback_fn = Pipe::read_callback_fn;\n  using write_callback_fn = Pipe::write_callback_fn;\n\n  void readDescriptor(read_descriptor_callback_fn fn);\n  void read(Allocation allocation, read_callback_fn fn);\n  void write(Message message, write_callback_fn fn);\n\n  const std::string& getRemoteName();\n\n  void close();\n\n private:\n  void initFromLoop();\n\n  void readDescriptorFromLoop(read_descriptor_callback_fn fn);\n\n  void readFromLoop(Allocation allocation, read_callback_fn fn);\n\n  void writeFromLoop(Message message, write_callback_fn fn);\n\n  void closeFromLoop();\n\n  enum State {\n    INITIALIZING,\n    CLIENT_ABOUT_TO_SEND_HELLO_AND_BROCHURE,\n    SERVER_WAITING_FOR_BROCHURE,\n    CLIENT_WAITING_FOR_BROCHURE_ANSWER,\n    SERVER_WAITING_FOR_CONNECTIONS,\n    ESTABLISHED\n  };\n\n  State state_{INITIALIZING};\n\n  std::shared_ptr<ContextImpl> context_;\n  std::shared_ptr<ListenerImpl> listener_;\n\n  // An identifier for the pipe, composed of the identifier for the context or\n  // listener, combined with an increasing sequence number. It will only be used\n  // for logging and debugging purposes.\n  std::string id_;\n\n  // The name the user has given to the connect method of the local context (for\n  // outgoing pipes) or to the constructor of the context on the remote end (for\n  // incoming pipes).\n  std::string remoteName_;\n\n  std::string transport_;\n  enum ConnectionId { DESCRIPTOR, DESCRIPTOR_REPLY };\n  std::shared_ptr<transport::Connection> descriptorConnection_;\n  std::shared_ptr<transport::Connection> descriptorReplyConnection_;\n\n  std::unordered_map<std::string, std::shared_ptr<channel::Channel>> channels_;\n  std::unordered_map<std::pair<Device, Device>, std::string>\n      channelForDevicePair_;\n\n  // The server will set this up when it tell the client to switch to a\n  // different connection or to open some channels.\n  std::unordered_map<uint64_t, uint64_t> registrationIds_;\n\n  std::unordered_map<std::string, std::vector<uint64_t>>\n      channelRegistrationIds_;\n\n  std::unordered_map<\n      std::string,\n      std::vector<std::shared_ptr<transport::Connection>>>\n      channelReceivedConnections_;\n\n  OpsStateMachine<PipeImpl, ReadOperation> readOps_{\n      *this,\n      &PipeImpl::advanceReadOperation};\n  using ReadOpIter = decltype(readOps_)::Iter;\n  OpsStateMachine<PipeImpl, WriteOperation> writeOps_{\n      *this,\n      &PipeImpl::advanceWriteOperation};\n  using WriteOpIter = decltype(writeOps_)::Iter;\n\n  // A sequence number for the calls to read and write.\n  uint64_t nextMessageBeingRead_{0};\n  uint64_t nextMessageBeingWritten_{0};\n\n  // A sequence number for the invocations of the callbacks of read and write.\n  uint64_t nextReadDescriptorCallbackToCall_{0};\n  uint64_t nextReadCallbackToCall_{0};\n  uint64_t nextWriteCallbackToCall_{0};\n\n  // When reading, we first read the descriptor, then signal this to the user,\n  // and only once the user has allocated the memory we read the payloads. These\n  // members store where we are in this loop, i.e., whether the next buffer we\n  // will read from the connection will be a descriptor or a payload, and the\n  // sequence number of which message that will be for.\n  enum ConnectionState { AWAITING_DESCRIPTOR, AWAITING_PAYLOADS };\n  ConnectionState connectionState_{AWAITING_DESCRIPTOR};\n  uint64_t messageBeingReadFromConnection_{0};\n\n  // When reading, each message will be presented to the user in order for some\n  // memory to be allocated for its payloads and tensors (this happens by\n  // calling the readDescriptor callback and waiting for a read call). Under\n  // normal operation there will be either 0 or 1 messages whose allocation is\n  // pending, but there could be more after an error occurs, as we'll flush all\n  // callbacks. We need to remember which is the first such operation for which\n  // we're waiting for allocation in order to match calls to read to the right\n  // message and for sanity checks. We do so by using a special state in the\n  // state machine to identify the next operation that will receive a read call,\n  // and store its iterator in this field.\n  optional<ReadOpIter> nextMessageGettingAllocation_;\n\n  Error error_{Error::kSuccess};\n\n  //\n  // Helpers to prepare callbacks from transports and listener\n  //\n\n  CallbackWrapper<PipeImpl> callbackWrapper_{*this, *this->context_};\n\n  //\n  // Error handling\n  //\n\n  void setError(Error error);\n\n  void handleError();\n\n  //\n  // State machines\n  //\n\n  // Transitions for the pipe's initial handshake.\n  // On the client side:\n  void onReadWhileClientWaitingForBrochureAnswer(\n      const BrochureAnswer& nopBrochureAnswer);\n  // On the server side:\n  void onReadWhileServerWaitingForBrochure(const Brochure& nopBrochure);\n  void onAcceptWhileServerWaitingForConnection(\n      ConnectionId connId,\n      std::string receivedTransport,\n      std::shared_ptr<transport::Connection> receivedConnection);\n  void onAcceptWhileServerWaitingForChannel(\n      std::string channelName,\n      size_t connId,\n      std::string receivedTransport,\n      std::shared_ptr<transport::Connection> receivedConnection);\n\n  // State machines for read and write ops.\n  void advanceReadOperation(\n      ReadOpIter opIter,\n      ReadOperation::State prevOpState);\n  void advanceWriteOperation(\n      WriteOpIter opIter,\n      WriteOperation::State prevOpState);\n\n  // Actions (i.e., methods that begin a state transition).\n  // For read operations:\n  void readDescriptorOfMessage(ReadOpIter opIter);\n  void callReadDescriptorCallback(ReadOpIter opIter);\n  void expectReadCall(ReadOpIter opIter);\n  void readPayloadsOfMessage(ReadOpIter opIter);\n  void receiveTensorsOfMessage(ReadOpIter opIter);\n  void writeDescriptorReplyOfMessage(ReadOpIter opIter);\n  void callReadCallback(ReadOpIter opIter);\n  // For write operations:\n  void writeDescriptorOfMessage(WriteOpIter opIter);\n  void writePayloadsOfMessage(WriteOpIter opIter);\n  void readDescriptorReplyOfMessage(WriteOpIter opIter);\n  void sendTensorsOfMessage(WriteOpIter opIter);\n  void callWriteCallback(WriteOpIter opIter);\n\n  //\n  // Everything else\n  //\n\n  void initConnection(transport::Connection& connection, uint64_t token);\n  uint64_t registerTransport(ConnectionId connId);\n  std::vector<uint64_t>& registerChannel(const std::string& channelName);\n\n  bool pendingRegistrations();\n\n  template <typename T>\n  friend class CallbackWrapper;\n\n  // Contexts and listeners do sometimes need to call directly into initFromLoop\n  // and closeFromLoop, in order to make sure that some of their operations can\n  // happen \"atomically\" on the connection, without possibly other operations\n  // occurring in between (e.g., an error).\n  friend ContextImpl;\n  friend ListenerImpl;\n};\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/misc/CMakeLists.txt",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\nadd_executable(dump_state_machine dump_state_machine.cc)\nfind_package(Clang REQUIRED)\ntarget_include_directories(dump_state_machine PRIVATE ${CLANG_INCLUDE_DIRS})\ntarget_link_libraries(dump_state_machine PRIVATE\n  clangTooling\n  clangBasic\n  clangASTMatchers)\n"
  },
  {
    "path": "tensorpipe/misc/dump_state_machine.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <iostream>\n#include <regex>\n#include <unordered_set>\n\n#include <clang/ASTMatchers/ASTMatchFinder.h>\n#include <clang/ASTMatchers/ASTMatchers.h>\n#include <clang/Frontend/FrontendActions.h>\n#include <clang/Tooling/CommonOptionsParser.h>\n#include <clang/Tooling/Tooling.h>\n#include <llvm/Support/CommandLine.h>\n\nusing namespace clang::ast_matchers;\nusing namespace clang::tooling;\nusing namespace llvm;\n\nnamespace {\n\nstd::string exprToString(const clang::Expr& e) {\n  std::string statement;\n  raw_string_ostream stream(statement);\n  e.printPretty(stream, nullptr, clang::PrintingPolicy(clang::LangOptions()));\n  stream.flush();\n\n  return statement;\n}\n\nstd::string cleanUp(const std::string& s) {\n  std::string res = s;\n  res = std::regex_replace(res, std::regex(\"(struct|class) [a-zA-Z_]+::\"), \"\");\n  res = std::regex_replace(res, std::regex(\"this->\"), \"\");\n  return res;\n}\n\nstd::string escape(const std::string& s) {\n  std::string res = s;\n  res = std::regex_replace(res, std::regex(\"\\\\{\"), \"\\\\{\");\n  res = std::regex_replace(res, std::regex(\"\\\\}\"), \"\\\\}\");\n  res = std::regex_replace(res, std::regex(\">\"), \"\\\\>\");\n  res = std::regex_replace(res, std::regex(\"<\"), \"\\\\<\");\n  res = std::regex_replace(res, std::regex(\"\\\\|\"), \"\\\\|\");\n  return res;\n}\n\nclass MethodPrinter : public MatchFinder::MatchCallback {\n  std::unordered_set<std::string> nodes_;\n\n  void addNode(const std::string& label) {\n    std::cout << label << \" [label=<<b>\" << label\n              << \"</b>>,group=states,fontstyle=\\\"bold\\\"];\" << std::endl;\n    nodes_.insert(label);\n  }\n\n public:\n  void run(const MatchFinder::MatchResult& result) override {\n    static int edgeCount = 0;\n\n    const clang::CallExpr& e = *result.Nodes.getNodeAs<clang::CallExpr>(\"x\");\n    std::string edgeId = \"edge\" + std::to_string(edgeCount++);\n    std::string fromId = cleanUp(exprToString(*e.getArg(1)));\n    std::string toId = cleanUp(exprToString(*e.getArg(2)));\n\n    if (nodes_.count(fromId) == 0) {\n      addNode(fromId);\n    }\n\n    if (nodes_.count(toId) == 0) {\n      addNode(toId);\n    }\n\n    std::string edgeColor = \"orange3\";\n    int edgeWeight = 100;\n    std::string cond = cleanUp(exprToString(*e.getArg(3)));\n    if (std::regex_search(cond, std::regex(\"^error_\"))) {\n      edgeColor = \"red3\";\n      edgeWeight = 0;\n    }\n    if (std::regex_search(cond, std::regex(\"^!error_\"))) {\n      edgeColor = \"forestgreen\";\n    }\n    cond = std::regex_replace(cond, std::regex(\" \\\\&\\\\&\"), \"\\\\n\");\n    cond = escape(cond);\n\n    std::string actions = cleanUp(exprToString(*e.getArg(4)));\n    actions = std::regex_replace(actions, std::regex(\"(\\\\{|\\\\})\"), \"\");\n    actions = std::regex_replace(actions, std::regex(\", \"), \"\\\\n\");\n    actions = std::regex_replace(actions, std::regex(\"\\\\&\"), \"\");\n\n    std::cout << edgeId << \" [label=\\\"{\" << cond << \"|\" << actions\n              << \"}\\\",shape=record,style=\\\"rounded,dashed\\\",color=\\\"\"\n              << edgeColor << \"\\\"];\" << std::endl;\n\n    std::cout << fromId << \" -> \" << edgeId << \"[dir=\\\"none\\\",color=\\\"\"\n              << edgeColor << \"\\\",style=\\\"dashed\\\",weight=\" << edgeWeight\n              << \"];\" << std::endl;\n\n    std::cout << edgeId << \" -> \" << toId << \"[color=\\\"\" << edgeColor\n              << \"\\\",style=\\\"dashed\\\",weight=\" << edgeWeight << \"];\"\n              << std::endl;\n  }\n};\n\n} // namespace\n\nint main(int argc, const char* argv[]) {\n  cl::OptionCategory category(\"dump_state_machine\");\n  cl::opt<std::string> methodName(\n      \"method\",\n      cl::Required,\n      cl::cat(category),\n      cl::desc(\n          \"Name of the method implementing the state machine's transitions.\"),\n      cl::value_desc(\"method_name\"));\n\n  CommonOptionsParser optionsParser(argc, argv, category, cl::Required);\n  ClangTool tool(\n      optionsParser.getCompilations(), optionsParser.getSourcePathList());\n  auto methodMatcher = callExpr(\n                           callee(cxxMethodDecl(hasName(\"attemptTransition\"))),\n                           hasAncestor(cxxMethodDecl(hasName(methodName))))\n                           .bind(\"x\");\n  MethodPrinter printer;\n  MatchFinder finder;\n  finder.addMatcher(methodMatcher, &printer);\n  std::cout << \"digraph {\" << std::endl\n            << \"graph [rankdir=TB]\" << std::endl\n            << \"node [shape=box]\" << std::endl;\n  int res = tool.run(newFrontendActionFactory(&finder).get());\n  std::cout << \"}\" << std::endl;\n  return res;\n}\n"
  },
  {
    "path": "tensorpipe/python/CMakeLists.txt",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\nif(NOT (COMMAND pybind11_add_module))\n  add_subdirectory(\n    ${PROJECT_SOURCE_DIR}/third_party/pybind11\n    ${PROJECT_BINARY_DIR}/third_party/pybind11\n    EXCLUDE_FROM_ALL)\nendif()\n\nset(PYBIND11_CPP_STANDARD -std=c++17)\npybind11_add_module(pytensorpipe tensorpipe.cc)\ntarget_link_libraries(pytensorpipe PRIVATE tensorpipe)\n"
  },
  {
    "path": "tensorpipe/python/tensorpipe.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <memory>\n#include <string>\n#include <vector>\n\n#include <pybind11/functional.h>\n#include <pybind11/pybind11.h>\n#include <pybind11/stl.h>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/tensorpipe.h>\n\nnamespace py = pybind11;\n\nnamespace {\n\nusing tensorpipe::optional;\n\n// RAII wrapper to reliably release every buffer we get.\nclass BufferWrapper {\n public:\n  BufferWrapper(const py::buffer& buffer, int flags) {\n    if (PyObject_GetBuffer(buffer.ptr(), &buffer_, flags) != 0) {\n      throw py::error_already_set();\n    }\n  }\n\n  BufferWrapper(const BufferWrapper& other) = delete;\n\n  BufferWrapper(BufferWrapper&& other) = delete;\n\n  BufferWrapper& operator=(const BufferWrapper& other) = delete;\n\n  BufferWrapper& operator=(BufferWrapper&& other) = delete;\n\n  ~BufferWrapper() {\n    PyBuffer_Release(&buffer_);\n  }\n\n  void* ptr() const {\n    return buffer_.buf;\n  }\n\n  size_t length() const {\n    return buffer_.len;\n  }\n\n  py::buffer_info getBuffer() {\n    return py::buffer_info(\n        buffer_.buf,\n        1,\n        py::format_descriptor<unsigned char>::format(),\n        1,\n        {static_cast<size_t>(buffer_.len)},\n        {1});\n  }\n\n private:\n  Py_buffer buffer_;\n};\n\nclass OutgoingPayload {\n public:\n  BufferWrapper buffer;\n  BufferWrapper metadata;\n\n  OutgoingPayload(const py::buffer& buffer, const py::buffer& metadata)\n      : buffer(buffer, PyBUF_SIMPLE), metadata(metadata, PyBUF_SIMPLE) {}\n};\n\nclass OutgoingTensor {\n public:\n  BufferWrapper buffer;\n  BufferWrapper metadata;\n\n  OutgoingTensor(const py::buffer& buffer, const py::buffer& metadata)\n      : buffer(buffer, PyBUF_SIMPLE), metadata(metadata, PyBUF_SIMPLE) {}\n};\n\nclass OutgoingMessage {\n public:\n  BufferWrapper metadata;\n  std::vector<std::shared_ptr<OutgoingPayload>> payloads;\n  std::vector<std::shared_ptr<OutgoingTensor>> tensors;\n\n  OutgoingMessage(\n      const py::buffer& metadata,\n      const std::vector<std::shared_ptr<OutgoingPayload>>& payloads,\n      const std::vector<std::shared_ptr<OutgoingTensor>>& tensors)\n      : metadata(metadata, PyBUF_SIMPLE),\n        payloads(payloads),\n        tensors(tensors) {}\n};\n\ntensorpipe::Message prepareToWrite(std::shared_ptr<OutgoingMessage> pyMessage) {\n  tensorpipe::Message tpMessage{\n      {reinterpret_cast<char*>(pyMessage->metadata.ptr()),\n       pyMessage->metadata.length()}};\n  tpMessage.payloads.reserve(pyMessage->payloads.size());\n  for (const auto& pyPayload : pyMessage->payloads) {\n    tensorpipe::Message::Payload tpPayload{\n        .data = pyPayload->buffer.ptr(),\n        .length = pyPayload->buffer.length(),\n        .metadata =\n            {reinterpret_cast<char*>(pyPayload->metadata.ptr()),\n             pyPayload->metadata.length()},\n    };\n    tpMessage.payloads.push_back(std::move(tpPayload));\n  }\n  tpMessage.tensors.reserve(pyMessage->tensors.size());\n  for (const auto& pyTensor : pyMessage->tensors) {\n    tensorpipe::Message::Tensor tpTensor{\n        .buffer = tensorpipe::CpuBuffer{.ptr = pyTensor->buffer.ptr()},\n        .length = pyTensor->buffer.length(),\n        .metadata =\n            {reinterpret_cast<char*>(pyTensor->metadata.ptr()),\n             pyTensor->metadata.length()},\n    };\n    tpMessage.tensors.push_back(std::move(tpTensor));\n  }\n  return tpMessage;\n}\n\nclass IncomingPayload {\n public:\n  size_t length;\n  optional<BufferWrapper> buffer;\n  py::bytes metadata;\n\n  IncomingPayload(size_t length, py::bytes metadata)\n      : length(length), metadata(metadata) {}\n\n  void set_buffer(const py::buffer& pyBuffer) {\n    TP_THROW_ASSERT_IF(buffer.has_value()) << \"Buffer already set\";\n    buffer.emplace(pyBuffer, PyBUF_SIMPLE | PyBUF_WRITABLE);\n    if (buffer->length() != length) {\n      buffer.reset();\n      TP_THROW_ASSERT() << \"Bad length\";\n    }\n  }\n};\n\nclass IncomingTensor {\n public:\n  size_t length;\n  optional<BufferWrapper> buffer;\n  py::bytes metadata;\n\n  IncomingTensor(size_t length, py::bytes metadata)\n      : length(length), metadata(metadata) {}\n\n  void set_buffer(const py::buffer& pyBuffer) {\n    TP_THROW_ASSERT_IF(buffer.has_value()) << \"Buffer already set\";\n    buffer.emplace(pyBuffer, PyBUF_SIMPLE | PyBUF_WRITABLE);\n    if (buffer->length() != length) {\n      buffer.reset();\n      TP_THROW_ASSERT() << \"Bad length\";\n    }\n  }\n};\n\nclass IncomingMessage {\n public:\n  py::bytes metadata;\n  std::vector<std::shared_ptr<IncomingPayload>> payloads;\n  std::vector<std::shared_ptr<IncomingTensor>> tensors;\n\n  IncomingMessage(\n      py::bytes metadata,\n      std::vector<std::shared_ptr<IncomingPayload>> payloads,\n      std::vector<std::shared_ptr<IncomingTensor>> tensors)\n      : metadata(metadata), payloads(payloads), tensors(tensors) {}\n};\n\nstd::shared_ptr<IncomingMessage> prepareToAllocate(\n    const tensorpipe::Descriptor& tpDescriptor) {\n  std::vector<std::shared_ptr<IncomingPayload>> pyPayloads;\n  pyPayloads.reserve(tpDescriptor.payloads.size());\n  for (const auto& tpPayload : tpDescriptor.payloads) {\n    pyPayloads.push_back(std::make_shared<IncomingPayload>(\n        tpPayload.length, tpPayload.metadata));\n  }\n  std::vector<std::shared_ptr<IncomingTensor>> pyTensors;\n  pyTensors.reserve(tpDescriptor.tensors.size());\n  for (const auto& tpTensor : tpDescriptor.tensors) {\n    pyTensors.push_back(\n        std::make_shared<IncomingTensor>(tpTensor.length, tpTensor.metadata));\n  }\n  auto pyMessage = std::make_shared<IncomingMessage>(\n      tpDescriptor.metadata, std::move(pyPayloads), std::move(pyTensors));\n  return pyMessage;\n}\n\ntensorpipe::Allocation prepareToRead(\n    std::shared_ptr<IncomingMessage> pyMessage) {\n  tensorpipe::Allocation tpAllocation;\n  tpAllocation.payloads.reserve(pyMessage->payloads.size());\n  for (const auto& pyPayload : pyMessage->payloads) {\n    TP_THROW_ASSERT_IF(!pyPayload->buffer.has_value()) << \"No buffer\";\n    tensorpipe::Allocation::Payload tpPayload{\n        .data = pyPayload->buffer.value().ptr(),\n    };\n    tpAllocation.payloads.push_back(std::move(tpPayload));\n  }\n  tpAllocation.tensors.reserve(pyMessage->tensors.size());\n  for (const auto& pyTensor : pyMessage->tensors) {\n    TP_THROW_ASSERT_IF(!pyTensor->buffer.has_value()) << \"No buffer\";\n    tensorpipe::Allocation::Tensor tpTensor{\n        .buffer = tensorpipe::CpuBuffer{.ptr = pyTensor->buffer.value().ptr()},\n    };\n    tpAllocation.tensors.push_back(std::move(tpTensor));\n  }\n  return tpAllocation;\n}\n\ntemplate <typename T>\nusing shared_ptr_class_ = py::class_<T, std::shared_ptr<T>>;\n\n} // namespace\n\nPYBIND11_MODULE(pytensorpipe, module) {\n  py::print(\n      \"These bindings are EXPERIMENTAL, intended to give a PREVIEW of the API, \"\n      \"and, as such, may CHANGE AT ANY TIME.\");\n\n  shared_ptr_class_<tensorpipe::Context> context(module, \"Context\");\n  shared_ptr_class_<tensorpipe::Listener> listener(module, \"Listener\");\n  shared_ptr_class_<tensorpipe::Pipe> pipe(module, \"Pipe\");\n\n  shared_ptr_class_<OutgoingPayload> outgoingPayload(module, \"OutgoingPayload\");\n  outgoingPayload.def(\n      py::init<py::buffer, py::buffer>(),\n      py::arg(\"buffer\"),\n      py::arg(\"metadata\"));\n  shared_ptr_class_<OutgoingTensor> outgoingTensor(module, \"OutgoingTensor\");\n  outgoingTensor.def(\n      py::init<py::buffer, py::buffer>(),\n      py::arg(\"buffer\"),\n      py::arg(\"metadata\"));\n  shared_ptr_class_<OutgoingMessage> outgoingMessage(module, \"OutgoingMessage\");\n  outgoingMessage.def(\n      py::init<\n          py::buffer,\n          const std::vector<std::shared_ptr<OutgoingPayload>>,\n          const std::vector<std::shared_ptr<OutgoingTensor>>>(),\n      py::arg(\"metadata\"),\n      py::arg(\"payloads\"),\n      py::arg(\"tensors\"));\n\n  shared_ptr_class_<IncomingPayload> incomingPayload(\n      module, \"IncomingPayload\", py::buffer_protocol());\n  incomingPayload.def_readonly(\"length\", &IncomingPayload::length);\n  incomingPayload.def_readonly(\"metadata\", &IncomingPayload::metadata);\n  incomingPayload.def_property(\n      \"buffer\",\n      [](IncomingPayload& pyPayload) -> py::buffer_info {\n        TP_THROW_ASSERT_IF(!pyPayload.buffer.has_value()) << \"No buffer\";\n        return pyPayload.buffer->getBuffer();\n      },\n      &IncomingPayload::set_buffer);\n  shared_ptr_class_<IncomingTensor> incomingTensor(\n      module, \"IncomingTensor\", py::buffer_protocol());\n  incomingTensor.def_readonly(\"length\", &IncomingTensor::length);\n  incomingTensor.def_readonly(\"metadata\", &IncomingTensor::metadata);\n  incomingTensor.def_property(\n      \"buffer\",\n      [](IncomingTensor& pyTensor) -> py::buffer_info {\n        TP_THROW_ASSERT_IF(!pyTensor.buffer.has_value()) << \"No buffer\";\n        return pyTensor.buffer->getBuffer();\n      },\n      &IncomingTensor::set_buffer);\n  shared_ptr_class_<IncomingMessage> incomingMessage(\n      module, \"IncomingMessage\", py::buffer_protocol());\n  incomingMessage.def_readonly(\"metadata\", &IncomingMessage::metadata);\n  incomingMessage.def_readonly(\"payloads\", &IncomingMessage::payloads);\n  incomingMessage.def_readonly(\"tensors\", &IncomingMessage::tensors);\n\n  // Creators.\n\n  context.def(py::init<>());\n  context.def(\n      \"listen\",\n      [](std::shared_ptr<tensorpipe::Context> context,\n         const std::vector<std::string>& urls) {\n        return context->listen(urls);\n      },\n      py::arg(\"urls\"));\n  context.def(\n      \"connect\",\n      [](std::shared_ptr<tensorpipe::Context> context, const std::string& url) {\n        return context->connect(url);\n      },\n      py::arg(\"url\"));\n\n  context.def(\n      \"join\",\n      &tensorpipe::Context::join,\n      py::call_guard<py::gil_scoped_release>());\n\n  // Callback registration.\n\n  listener.def(\n      \"listen\",\n      [](std::shared_ptr<tensorpipe::Listener> listener, py::object callback) {\n        listener->accept([callback{std::move(callback)}](\n                             const tensorpipe::Error& error,\n                             std::shared_ptr<tensorpipe::Pipe> pipe) mutable {\n          if (error) {\n            TP_LOG_ERROR() << error.what();\n            return;\n          }\n          TP_THROW_ASSERT_IF(!pipe) << \"No pipe\";\n          py::gil_scoped_acquire acquire;\n          try {\n            callback(std::move(pipe));\n          } catch (const py::error_already_set& err) {\n            TP_LOG_ERROR() << \"Callback raised exception: \" << err.what();\n          }\n          // Leaving the scope will decrease the refcount of callback which\n          // may cause it to get destructed, which might segfault since we\n          // won't be holding the GIL anymore. So we reset callback now,\n          // while we're still holding the GIL.\n          callback = py::object();\n        });\n      });\n\n  pipe.def(\n      \"read_descriptor\",\n      [](std::shared_ptr<tensorpipe::Pipe> pipe, py::object callback) {\n        pipe->readDescriptor([callback{std::move(callback)}](\n                                 const tensorpipe::Error& error,\n                                 tensorpipe::Descriptor descriptor) mutable {\n          if (error) {\n            TP_LOG_ERROR() << error.what();\n            return;\n          }\n          py::gil_scoped_acquire acquire;\n          try {\n            callback(prepareToAllocate(std::move(descriptor)));\n          } catch (const py::error_already_set& err) {\n            TP_LOG_ERROR() << \"Callback raised exception: \" << err.what();\n          }\n          // Leaving the scope will decrease the refcount of callback which\n          // may cause it to get destructed, which might segfault since we\n          // won't be holding the GIL anymore. So we reset callback now,\n          // while we're still holding the GIL.\n          callback = py::object();\n        });\n      });\n\n  pipe.def(\n      \"read\",\n      [](std::shared_ptr<tensorpipe::Pipe> pipe,\n         std::shared_ptr<IncomingMessage> pyMessage,\n         py::object callback) {\n        tensorpipe::Allocation tpAllocation =\n            prepareToRead(std::move(pyMessage));\n        pipe->read(\n            std::move(tpAllocation),\n            [callback{std::move(callback)}](\n                const tensorpipe::Error& error) mutable {\n              if (error) {\n                TP_LOG_ERROR() << error.what();\n                return;\n              }\n              py::gil_scoped_acquire acquire;\n              try {\n                callback();\n              } catch (const py::error_already_set& err) {\n                TP_LOG_ERROR() << \"Callback raised exception: \" << err.what();\n              }\n              // Leaving the scope will decrease the refcount of callback which\n              // may cause it to get destructed, which might segfault since we\n              // won't be holding the GIL anymore. So we reset callback now,\n              // while we're still holding the GIL.\n              callback = py::object();\n            });\n      });\n\n  pipe.def(\n      \"write\",\n      [](std::shared_ptr<tensorpipe::Pipe> pipe,\n         std::shared_ptr<OutgoingMessage> pyMessage,\n         py::object callback) {\n        tensorpipe::Message tpMessage = prepareToWrite(std::move(pyMessage));\n        pipe->write(\n            std::move(tpMessage),\n            [callback{std::move(callback)}](\n                const tensorpipe::Error& error) mutable {\n              if (error) {\n                TP_LOG_ERROR() << error.what();\n                return;\n              }\n              py::gil_scoped_acquire acquire;\n              try {\n                callback();\n              } catch (const py::error_already_set& err) {\n                TP_LOG_ERROR() << \"Callback raised exception: \" << err.what();\n              }\n              // Leaving the scope will decrease the refcount of callback which\n              // may cause it to get destructed, which might segfault since we\n              // won't be holding the GIL anymore. So we reset callback now,\n              // while we're still holding the GIL.\n              callback = py::object();\n            });\n      });\n\n  // Transports and channels\n\n  shared_ptr_class_<tensorpipe::transport::Context> abstractTransport(\n      module, \"AbstractTransport\");\n\n  module.def(\"create_uv_transport\", &tensorpipe::transport::uv::create);\n\n#if TENSORPIPE_HAS_SHM_TRANSPORT\n  module.def(\"create_shm_transport\", &tensorpipe::transport::shm::create);\n#endif // TENSORPIPE_HAS_SHM_TRANSPORT\n\n  context.def(\n      \"register_transport\",\n      &tensorpipe::Context::registerTransport,\n      py::arg(\"priority\"),\n      py::arg(\"name\"),\n      py::arg(\"transport\"));\n\n  shared_ptr_class_<tensorpipe::channel::Context> abstractChannel(\n      module, \"AbstractChannel\");\n\n  module.def(\"create_basic_channel\", &tensorpipe::channel::basic::create);\n\n#if TENSORPIPE_HAS_CMA_CHANNEL\n  module.def(\"create_cma_channel\", &tensorpipe::channel::cma::create);\n#endif // TENSORPIPE_HAS_CMA_CHANNEL\n\n  context.def(\n      \"register_channel\",\n      &tensorpipe::Context::registerChannel,\n      py::arg(\"priority\"),\n      py::arg(\"name\"),\n      py::arg(\"channel\"));\n\n  // Helpers\n\n  listener.def(\"get_url\", &tensorpipe::Listener::url, py::arg(\"transport\"));\n}\n"
  },
  {
    "path": "tensorpipe/tensorpipe.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <tensorpipe/config.h>\n\n// High-level API\n\n#include <tensorpipe/core/context.h>\n#include <tensorpipe/core/error.h>\n#include <tensorpipe/core/listener.h>\n#include <tensorpipe/core/message.h>\n#include <tensorpipe/core/pipe.h>\n\n#include <tensorpipe/common/buffer.h>\n\n#include <tensorpipe/common/cpu_buffer.h>\n\n// Transports\n\n#include <tensorpipe/transport/context.h>\n#include <tensorpipe/transport/error.h>\n\n#include <tensorpipe/transport/uv/error.h>\n#include <tensorpipe/transport/uv/factory.h>\n#include <tensorpipe/transport/uv/utility.h>\n\n#if TENSORPIPE_HAS_SHM_TRANSPORT\n#include <tensorpipe/transport/shm/factory.h>\n#endif // TENSORPIPE_HAS_SHM_TRANSPORT\n\n#if TENSORPIPE_HAS_IBV_TRANSPORT\n#include <tensorpipe/transport/ibv/error.h>\n#include <tensorpipe/transport/ibv/factory.h>\n#include <tensorpipe/transport/ibv/utility.h>\n#endif // TENSORPIPE_HAS_IBV_TRANSPORT\n\n// Channels\n\n#include <tensorpipe/channel/context.h>\n#include <tensorpipe/channel/error.h>\n\n#include <tensorpipe/channel/basic/factory.h>\n#include <tensorpipe/channel/mpt/factory.h>\n#include <tensorpipe/channel/xth/factory.h>\n\n#if TENSORPIPE_HAS_CMA_CHANNEL\n#include <tensorpipe/channel/cma/factory.h>\n#endif // TENSORPIPE_HAS_CMA_CHANNEL\n"
  },
  {
    "path": "tensorpipe/tensorpipe_cuda.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <tensorpipe/config_cuda.h>\n\n// High-level API\n\n#include <tensorpipe/common/cuda_buffer.h>\n\n// Channels\n\n#include <tensorpipe/channel/cuda_basic/factory.h>\n#include <tensorpipe/channel/cuda_xth/factory.h>\n\n#if TENSORPIPE_HAS_CUDA_GDR_CHANNEL\n#include <tensorpipe/channel/cuda_gdr/factory.h>\n#endif // TENSORPIPE_HAS_CUDA_GDR_CHANNEL\n\n#if TENSORPIPE_HAS_CUDA_IPC_CHANNEL\n#include <tensorpipe/channel/cuda_ipc/factory.h>\n#endif // TENSORPIPE_HAS_CUDA_IPC_CHANNEL\n"
  },
  {
    "path": "tensorpipe/test/CMakeLists.txt",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\n# List of source files that we need to build tensorpipe_test executable.\nset(TP_TEST_SRCS)\n\n# TP_TEST_LINK_LIBRARIES is list of dependent libraries to be linked\nset(TP_TEST_LINK_LIBRARIES)\n\n# TP_TEST_INCLUDE_DIRS is list of include path to be used\nset(TP_TEST_INCLUDE_DIRS)\n\n# TP_TEST_COMPILE_DEFS is list of compile definitions to be used\nset(TP_TEST_COMPILE_DEFS)\n\nlist(APPEND TP_TEST_SRCS\n  test.cc\n  test_environment.cc\n  transport/context_test.cc\n  transport/connection_test.cc\n  transport/uv/uv_test.cc\n  transport/uv/context_test.cc\n  transport/uv/loop_test.cc\n  transport/uv/connection_test.cc\n  transport/uv/sockaddr_test.cc\n  transport/listener_test.cc\n  core/context_test.cc\n  core/pipe_test.cc\n  channel/basic/basic_test.cc\n  channel/xth/xth_test.cc\n  channel/mpt/mpt_test.cc\n  channel/channel_test.cc\n  channel/channel_test_cpu.cc\n  common/system_test.cc\n  common/defs_test.cc\n  )\n\nif(TP_ENABLE_SHM)\n  list(APPEND TP_TEST_SRCS\n    common/epoll_loop_test.cc\n    common/ringbuffer_test.cc\n    common/shm_ringbuffer_test.cc\n    common/shm_segment_test.cc\n    transport/shm/reactor_test.cc\n    transport/shm/connection_test.cc\n    transport/shm/listener_test.cc\n    transport/shm/sockaddr_test.cc\n    transport/shm/shm_test.cc\n    )\nendif()\n\nif(TP_ENABLE_IBV)\n  list(APPEND TP_TEST_SRCS\n    common/epoll_loop_test.cc\n    common/ringbuffer_test.cc\n    transport/ibv/connection_test.cc\n    transport/ibv/ibv_test.cc\n    transport/ibv/sockaddr_test.cc\n    )\nendif()\n\nif(TP_ENABLE_CMA)\n  list(APPEND TP_TEST_SRCS\n    channel/cma/cma_test.cc\n    )\n  add_subdirectory(channel/cma)\nendif()\n\nif(TP_USE_CUDA)\n  find_package(CUDA REQUIRED)\n  list(APPEND TP_TEST_LINK_LIBRARIES ${CUDA_LIBRARIES})\n  list(APPEND TP_TEST_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})\n  list(APPEND TP_TEST_COMPILE_DEFS TP_USE_CUDA)\n\n  list(APPEND TP_TEST_SRCS\n    channel/channel_test_cuda.cc\n    channel/channel_test_cuda_multi_gpu.cc\n    channel/channel_test_cuda_xdtt.cc\n    common/cuda_test.cc\n    core/pipe_cuda_test.cc\n    )\n\n  list(APPEND TP_TEST_SRCS\n    channel/cuda_xth/cuda_xth_test.cc\n    channel/cuda_basic/cuda_basic_test.cc\n    )\n\n  if(TP_ENABLE_CUDA_IPC)\n    list(APPEND TP_TEST_SRCS\n      channel/cuda_ipc/cuda_ipc_test.cc\n      )\n  endif()\n\n  list(APPEND TP_TEST_SRCS\n    channel/cuda_gdr/cuda_gdr_test.cc\n    )\n\n  cuda_add_library(tensorpipe_cuda_kernel channel/kernel.cu)\n  list(APPEND TP_TEST_LINK_LIBRARIES tensorpipe_cuda_kernel)\n\n  list(APPEND TP_TEST_LINK_LIBRARIES tensorpipe_cuda)\nendif()\n\nadd_subdirectory(${PROJECT_SOURCE_DIR}/third_party/googletest\n  ${PROJECT_BINARY_DIR}/third_party/googletest EXCLUDE_FROM_ALL)\n\nlist(APPEND TP_TEST_LINK_LIBRARIES\n  tensorpipe\n  uv::uv\n  gmock\n  gtest_main)\n\nadd_executable(tensorpipe_test ${TP_TEST_SRCS})\n\n# Add all the dependent link libraries to the tensorpipe_test target\ntarget_link_libraries(tensorpipe_test PRIVATE ${TP_TEST_LINK_LIBRARIES})\ntarget_include_directories(tensorpipe_test PUBLIC ${TP_TEST_INCLUDE_DIRS})\ntarget_compile_definitions(tensorpipe_test PRIVATE ${TP_TEST_COMPILE_DEFS})\n"
  },
  {
    "path": "tensorpipe/test/channel/basic/basic_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/basic/factory.h>\n#include <tensorpipe/test/channel/channel_test_cpu.h>\n\nnamespace {\n\nclass BasicChannelTestHelper : public CpuChannelTestHelper {\n protected:\n  std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(\n      std::string id) override {\n    auto context = tensorpipe::channel::basic::create();\n    context->setId(std::move(id));\n    return context;\n  }\n};\n\nBasicChannelTestHelper helper;\n\n} // namespace\n\nINSTANTIATE_TEST_CASE_P(Basic, ChannelTestSuite, ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(Basic, CpuChannelTestSuite, ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/channel/channel_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/channel/channel_test.h>\n\n#include <numeric>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::channel;\n\n// Implement this in a subprocess as in some cases it may initialize CUDA and\n// thus would otherwise \"pollute\" the parent process.\nclass DeviceDescriptorsTest : public ChannelTestCase {\n public:\n  void run(ChannelTestHelper* helper) override {\n    auto peerGroup = helper->makePeerGroup();\n    peerGroup->spawn(\n        [&] {\n          std::shared_ptr<Context> context1 = helper->makeContext(\"ctx1\");\n          std::shared_ptr<Context> context2 = helper->makeContext(\"ctx2\");\n          const auto& descriptors1 = context1->deviceDescriptors();\n          const auto& descriptors2 = context2->deviceDescriptors();\n\n          EXPECT_FALSE(descriptors1.empty());\n          EXPECT_FALSE(descriptors2.empty());\n\n          EXPECT_EQ(descriptors1.size(), descriptors2.size());\n          for (const auto& deviceIter : descriptors1) {\n            EXPECT_FALSE(deviceIter.second.empty());\n            EXPECT_EQ(descriptors2.count(deviceIter.first), 1);\n            EXPECT_EQ(deviceIter.second, descriptors2.at(deviceIter.first));\n          }\n        },\n        [] {});\n  }\n};\n\nCHANNEL_TEST(ChannelTestSuite, DeviceDescriptors);\n\nclass ClientToServerTest : public ClientServerChannelTestCase {\n public:\n  static constexpr int kDataSize = 256;\n\n  void server(std::shared_ptr<Channel> channel) override {\n    // Initialize with sequential values.\n    std::vector<uint8_t> data(kDataSize);\n    std::iota(data.begin(), data.end(), 0);\n    std::unique_ptr<DataWrapper> wrappedData = helper_->makeDataWrapper(data);\n\n    // Perform send and wait for completion.\n    std::future<Error> sendFuture = sendWithFuture(channel, *wrappedData);\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    std::unique_ptr<DataWrapper> wrappedData =\n        helper_->makeDataWrapper(kDataSize);\n\n    // Perform recv and wait for completion.\n    std::future<Error> recvFuture = recvWithFuture(channel, *wrappedData);\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    // Validate contents of vector.\n    auto unwrappedData = wrappedData->unwrap();\n    for (auto i = 0; i < kDataSize; i++) {\n      EXPECT_EQ(unwrappedData[i], i);\n    }\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(ChannelTestSuite, ClientToServer);\n\nclass ServerToClientTest : public ClientServerChannelTestCase {\n  static constexpr int kDataSize = 256;\n\n public:\n  void server(std::shared_ptr<Channel> channel) override {\n    std::unique_ptr<DataWrapper> wrappedData =\n        helper_->makeDataWrapper(kDataSize);\n\n    // Perform recv and wait for completion.\n    std::future<Error> recvFuture = recvWithFuture(channel, *wrappedData);\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    // Validate contents of vector.\n    auto unwrappedData = wrappedData->unwrap();\n    for (auto i = 0; i < kDataSize; i++) {\n      EXPECT_EQ(unwrappedData[i], i);\n    }\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    // Initialize with sequential values.\n    std::vector<uint8_t> data(kDataSize);\n    std::iota(data.begin(), data.end(), 0);\n    std::unique_ptr<DataWrapper> wrappedData = helper_->makeDataWrapper(data);\n\n    // Perform send and wait for completion.\n    std::future<Error> sendFuture = sendWithFuture(channel, *wrappedData);\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(ChannelTestSuite, ServerToClient);\n\nclass SendMultipleTensorsTest : public ClientServerChannelTestCase {\n  // FIXME This is very puzzling, as in CircleCI making this field static (and\n  // possibly even constexpr) causes a undefined symbol link error.\n  const int dataSize_ = 256 * 1024; // 256KB\n  static constexpr int kNumTensors = 100;\n\n public:\n  void server(std::shared_ptr<Channel> channel) override {\n    // Initialize with sequential values.\n    std::vector<uint8_t> data(dataSize_);\n    std::iota(data.begin(), data.end(), 0);\n    std::unique_ptr<DataWrapper> wrappedData = helper_->makeDataWrapper(data);\n\n    // Error futures\n    std::vector<std::future<Error>> sendFutures;\n\n    // Perform send and wait for completion.\n    for (int i = 0; i < kNumTensors; i++) {\n      std::future<Error> sendFuture = sendWithFuture(channel, *wrappedData);\n      sendFutures.push_back(std::move(sendFuture));\n    }\n    for (auto& sendFuture : sendFutures) {\n      Error sendError = sendFuture.get();\n      EXPECT_FALSE(sendError) << sendError.what();\n    }\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    std::vector<std::unique_ptr<DataWrapper>> wrappedDataVec;\n    for (int i = 0; i < kNumTensors; i++) {\n      wrappedDataVec.push_back(helper_->makeDataWrapper(dataSize_));\n    }\n\n    // Error futures\n    std::vector<std::future<Error>> recvFutures;\n\n    // Perform recv and wait for completion.\n    for (auto& wrappedData : wrappedDataVec) {\n      std::future<Error> recvFuture = recvWithFuture(channel, *wrappedData);\n      recvFutures.push_back(std::move(recvFuture));\n    }\n    for (auto& recvFuture : recvFutures) {\n      Error recvError = recvFuture.get();\n      EXPECT_FALSE(recvError) << recvError.what();\n    }\n\n    // Validate contents of vector.\n    for (auto& wrappedData : wrappedDataVec) {\n      auto unwrappedData = wrappedData->unwrap();\n      for (int i = 0; i < dataSize_; i++) {\n        EXPECT_EQ(unwrappedData[i], i % 256);\n      }\n    }\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(ChannelTestSuite, SendMultipleTensors);\n\nclass SendTensorsBothWaysTest : public ClientServerChannelTestCase {\n  static constexpr int kDataSize = 256;\n\n  void server(std::shared_ptr<Channel> channel) override {\n    // Initialize sendBuffer with sequential values.\n    std::vector<uint8_t> sendData(kDataSize);\n    std::iota(sendData.begin(), sendData.end(), 0);\n    std::unique_ptr<DataWrapper> wrappedSendData =\n        helper_->makeDataWrapper(sendData);\n\n    // Recv buffer.\n    std::unique_ptr<DataWrapper> wrappedRecvData =\n        helper_->makeDataWrapper(kDataSize);\n\n    // Perform send.\n    std::future<Error> sendFuture = sendWithFuture(channel, *wrappedSendData);\n    // Perform recv.\n    std::future<Error> recvFuture = recvWithFuture(channel, *wrappedRecvData);\n\n    // Wait for completion of both.\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    // Verify recvd buffers.\n    auto unwrappedData = wrappedRecvData->unwrap();\n    for (int i = 0; i < kDataSize; i++) {\n      EXPECT_EQ(unwrappedData[i], i % 256);\n    }\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    // Initialize sendBuffer with sequential values.\n    std::vector<uint8_t> sendData(kDataSize);\n    std::iota(sendData.begin(), sendData.end(), 0);\n    std::unique_ptr<DataWrapper> wrappedSendData =\n        helper_->makeDataWrapper(sendData);\n\n    // Recv buffer.\n    std::unique_ptr<DataWrapper> wrappedRecvData =\n        helper_->makeDataWrapper(kDataSize);\n\n    // Perform send.\n    std::future<Error> sendFuture = sendWithFuture(channel, *wrappedSendData);\n    // Perform recv.\n    std::future<Error> recvFuture = recvWithFuture(channel, *wrappedRecvData);\n\n    // Wait for completion of both.\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    // Verify recvd buffers.\n    auto unwrappedData = wrappedRecvData->unwrap();\n    for (int i = 0; i < kDataSize; i++) {\n      EXPECT_EQ(unwrappedData[i], i % 256);\n    }\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(ChannelTestSuite, SendTensorsBothWays);\n\n// Call send and recv with a length of 0 but a non-null pointer.\nclass EmptyTensorTest : public ClientServerChannelTestCase {\n  void server(std::shared_ptr<Channel> channel) override {\n    // Allocate a non-empty vector so that its .data() pointer is non-null.\n    std::vector<uint8_t> data(1);\n    std::unique_ptr<DataWrapper> wrappedData = helper_->makeDataWrapper(data);\n    Buffer buffer = wrappedData->buffer();\n\n    // Perform send and wait for completion.\n    std::future<Error> sendFuture = sendWithFuture(channel, buffer, 0);\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    // Allocate a non-empty vector so that its .data() pointer is non-null.\n    std::unique_ptr<DataWrapper> wrappedData = helper_->makeDataWrapper(1);\n    Buffer buffer = wrappedData->buffer();\n\n    // Perform recv and wait for completion.\n    std::future<Error> recvFuture = recvWithFuture(channel, buffer, 0);\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(ChannelTestSuite, EmptyTensor);\n\n// Call send and recv with a length of 0, between sends and recvs with\n// positive length.\nclass EmptyAndNonEmptyTensorsTest : public ClientServerChannelTestCase {\n  void server(std::shared_ptr<Channel> channel) override {\n    std::vector<uint8_t> data(1);\n    std::unique_ptr<DataWrapper> wrappedData = helper_->makeDataWrapper(data);\n    Buffer buffer = wrappedData->buffer();\n\n    std::vector<std::future<Error>> sendFutures;\n    sendFutures.push_back(sendWithFuture(channel, buffer, 1));\n    sendFutures.push_back(sendWithFuture(channel, buffer, 0));\n    sendFutures.push_back(sendWithFuture(channel, buffer, 1));\n\n    for (auto& f : sendFutures) {\n      Error sendError = f.get();\n      EXPECT_FALSE(sendError) << sendError.what();\n    }\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    std::unique_ptr<DataWrapper> wrappedData = helper_->makeDataWrapper(1);\n    Buffer buffer = wrappedData->buffer();\n\n    std::vector<std::future<Error>> sendFutures;\n    sendFutures.push_back(recvWithFuture(channel, buffer, 1));\n    sendFutures.push_back(recvWithFuture(channel, buffer, 0));\n    sendFutures.push_back(recvWithFuture(channel, buffer, 1));\n\n    for (auto& f : sendFutures) {\n      Error sendError = f.get();\n      EXPECT_FALSE(sendError) << sendError.what();\n    }\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(ChannelTestSuite, EmptyAndNonEmptyTensors);\n"
  },
  {
    "path": "tensorpipe/test/channel/channel_test.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <future>\n#include <memory>\n#include <string>\n#include <thread>\n#include <tuple>\n#include <utility>\n#include <vector>\n\n#include <gtest/gtest.h>\n\n#include <tensorpipe/channel/channel.h>\n#include <tensorpipe/channel/context.h>\n#include <tensorpipe/common/buffer.h>\n#include <tensorpipe/common/cpu_buffer.h>\n#include <tensorpipe/test/peer_group.h>\n#include <tensorpipe/transport/connection.h>\n#include <tensorpipe/transport/listener.h>\n#include <tensorpipe/transport/uv/factory.h>\n\nclass DataWrapper {\n public:\n  virtual tensorpipe::Buffer buffer() const = 0;\n\n  virtual size_t bufferLength() const = 0;\n\n  virtual std::vector<uint8_t> unwrap() = 0;\n\n  virtual ~DataWrapper() = default;\n};\n\nclass ChannelTestHelper {\n public:\n  virtual ~ChannelTestHelper() = default;\n\n  std::shared_ptr<tensorpipe::channel::Context> makeContext(\n      std::string id,\n      bool skipViabilityCheck = false) {\n    std::shared_ptr<tensorpipe::channel::Context> ctx =\n        makeContextInternal(std::move(id));\n    if (!skipViabilityCheck) {\n      EXPECT_TRUE(ctx->isViable());\n    }\n    return ctx;\n  }\n\n  virtual std::shared_ptr<PeerGroup> makePeerGroup() {\n    return std::make_shared<ThreadPeerGroup>();\n  }\n\n  virtual std::unique_ptr<DataWrapper> makeDataWrapper(size_t length) = 0;\n  virtual std::unique_ptr<DataWrapper> makeDataWrapper(\n      std::vector<uint8_t> v) = 0;\n\n protected:\n  virtual std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(\n      std::string id) = 0;\n};\n\n[[nodiscard]] inline std::future<tensorpipe::Error> sendWithFuture(\n    std::shared_ptr<tensorpipe::channel::Channel> channel,\n    tensorpipe::Buffer buffer,\n    size_t length) {\n  auto promise = std::make_shared<std::promise<tensorpipe::Error>>();\n  auto future = promise->get_future();\n\n  channel->send(\n      buffer,\n      length,\n      [promise{std::move(promise)}](const tensorpipe::Error& error) {\n        promise->set_value(error);\n      });\n  return future;\n}\n\n[[nodiscard]] inline std::future<tensorpipe::Error> sendWithFuture(\n    std::shared_ptr<tensorpipe::channel::Channel> channel,\n    const DataWrapper& dataWrapper) {\n  return sendWithFuture(\n      std::move(channel), dataWrapper.buffer(), dataWrapper.bufferLength());\n}\n\n[[nodiscard]] inline std::future<tensorpipe::Error> recvWithFuture(\n    std::shared_ptr<tensorpipe::channel::Channel> channel,\n    tensorpipe::Buffer buffer,\n    size_t length) {\n  auto promise = std::make_shared<std::promise<tensorpipe::Error>>();\n  auto future = promise->get_future();\n\n  channel->recv(\n      buffer,\n      length,\n      [promise{std::move(promise)}](const tensorpipe::Error& error) {\n        promise->set_value(error);\n      });\n  return future;\n}\n\n[[nodiscard]] inline std::future<tensorpipe::Error> recvWithFuture(\n    std::shared_ptr<tensorpipe::channel::Channel> channel,\n    const DataWrapper& dataWrapper) {\n  return recvWithFuture(\n      std::move(channel), dataWrapper.buffer(), dataWrapper.bufferLength());\n}\n\nclass ChannelTestCase {\n public:\n  virtual void run(ChannelTestHelper* helper) = 0;\n\n  virtual ~ChannelTestCase() = default;\n};\n\nclass ClientServerChannelTestCase : public ChannelTestCase {\n  using MultiAcceptResult = std::pair<\n      tensorpipe::Error,\n      std::vector<std::shared_ptr<tensorpipe::transport::Connection>>>;\n\n  class MultiAcceptResultPromise {\n   public:\n    explicit MultiAcceptResultPromise(size_t numConnections)\n        : connections_(numConnections) {}\n\n    ~MultiAcceptResultPromise() {\n      // Sanity check\n      if (!error_) {\n        for (const auto& conn : connections_) {\n          EXPECT_NE(conn, nullptr);\n        }\n      }\n      promise_.set_value(\n          MultiAcceptResult(std::move(error_), std::move(connections_)));\n    }\n\n    std::future<MultiAcceptResult> getFuture() {\n      return promise_.get_future();\n    }\n\n    void setConnection(\n        size_t connId,\n        std::shared_ptr<tensorpipe::transport::Connection> connection) {\n      EXPECT_LT(connId, connections_.size());\n      connections_[connId] = std::move(connection);\n    }\n\n    void setError(tensorpipe::Error error) {\n      std::unique_lock<std::mutex> lock(errorMutex_);\n      if (error_) {\n        return;\n      }\n      error_ = std::move(error);\n    }\n\n   private:\n    tensorpipe::Error error_{tensorpipe::Error::kSuccess};\n    std::mutex errorMutex_;\n    std::vector<std::shared_ptr<tensorpipe::transport::Connection>>\n        connections_;\n    std::promise<MultiAcceptResult> promise_;\n  };\n\n  std::future<MultiAcceptResult> accept(\n      tensorpipe::transport::Listener& listener,\n      size_t numConnections) {\n    auto promise = std::make_shared<MultiAcceptResultPromise>(numConnections);\n    for (size_t i = 0; i < numConnections; ++i) {\n      listener.accept(\n          [promise](\n              const tensorpipe::Error& error,\n              std::shared_ptr<tensorpipe::transport::Connection> connection) {\n            if (error) {\n              promise->setError(std::move(error));\n              return;\n            }\n\n            connection->read([promise, connection](\n                                 const tensorpipe::Error& error,\n                                 const void* connIdBuf,\n                                 size_t length) mutable {\n              if (error) {\n                promise->setError(std::move(error));\n                return;\n              }\n              ASSERT_EQ(sizeof(uint64_t), length);\n              uint64_t connId = *static_cast<const uint64_t*>(connIdBuf);\n              promise->setConnection(connId, std::move(connection));\n            });\n          });\n    }\n\n    return promise->getFuture();\n  }\n\n  std::vector<std::shared_ptr<tensorpipe::transport::Connection>> connect(\n      std::shared_ptr<tensorpipe::transport::Context> transportCtx,\n      std::string addr,\n      size_t numConnections) {\n    std::vector<std::shared_ptr<tensorpipe::transport::Connection>> connections(\n        numConnections);\n    for (size_t connId = 0; connId < numConnections; ++connId) {\n      connections[connId] = transportCtx->connect(addr);\n      auto connIdBuf = std::make_shared<uint64_t>(connId);\n      connections[connId]->write(\n          connIdBuf.get(),\n          sizeof(uint64_t),\n          [connIdBuf](const tensorpipe::Error& error) {\n            EXPECT_FALSE(error) << error.what();\n          });\n    }\n\n    return connections;\n  }\n\n public:\n  void run(ChannelTestHelper* helper) override {\n    auto addr = \"127.0.0.1\";\n\n    helper_ = helper;\n    peers_ = helper_->makePeerGroup();\n    peers_->spawn(\n        [&] {\n          auto transportCtx = tensorpipe::transport::uv::create();\n          transportCtx->setId(\"server_harness\");\n          auto ctx = helper_->makeContext(\"server\");\n\n          auto listener = transportCtx->listen(addr);\n\n          auto connectionsFuture =\n              accept(*listener, ctx->numConnectionsNeeded());\n          peers_->send(PeerGroup::kClient, listener->addr());\n\n          tensorpipe::Error connectionsError;\n          std::vector<std::shared_ptr<tensorpipe::transport::Connection>>\n              connections;\n          std::tie(connectionsError, connections) = connectionsFuture.get();\n          EXPECT_FALSE(connectionsError) << connectionsError.what();\n\n          auto channel = ctx->createChannel(\n              std::move(connections), tensorpipe::channel::Endpoint::kListen);\n\n          server(std::move(channel));\n\n          ctx->join();\n          transportCtx->join();\n\n          afterServer();\n        },\n        [&] {\n          auto transportCtx = tensorpipe::transport::uv::create();\n          transportCtx->setId(\"client_harness\");\n          auto ctx = helper_->makeContext(\"client\");\n\n          auto laddr = peers_->recv(PeerGroup::kClient);\n\n          auto connections =\n              connect(transportCtx, laddr, ctx->numConnectionsNeeded());\n\n          auto channel = ctx->createChannel(\n              std::move(connections), tensorpipe::channel::Endpoint::kConnect);\n\n          client(std::move(channel));\n\n          ctx->join();\n          transportCtx->join();\n\n          afterClient();\n        });\n  }\n\n  virtual void server(\n      std::shared_ptr<tensorpipe::channel::Channel> /* channel */) {}\n  virtual void client(\n      std::shared_ptr<tensorpipe::channel::Channel> /* channel */) {}\n\n  virtual void afterServer() {}\n  virtual void afterClient() {}\n\n protected:\n  ChannelTestHelper* helper_;\n  std::shared_ptr<PeerGroup> peers_;\n};\n\nclass ChannelTestSuite : public ::testing::TestWithParam<ChannelTestHelper*> {};\n\n// Register a channel test.\n#define CHANNEL_TEST(suite, name) \\\n  TEST_P(suite, name) {           \\\n    name##Test t;                 \\\n    t.run(GetParam());            \\\n  }\n"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cpu.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/channel/channel_test_cpu.h>\n\n#include <numeric>\n\n#include <tensorpipe/test/channel/channel_test.h>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::channel;\n\n// Call send and recv with a null pointer and a length of 0.\nclass NullPointerTest : public ClientServerChannelTestCase {\n  void server(std::shared_ptr<Channel> channel) override {\n    // Perform send and wait for completion.\n    std::future<Error> sendFuture =\n        sendWithFuture(channel, CpuBuffer{.ptr = nullptr}, 0);\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    // Perform recv and wait for completion.\n    std::future<Error> recvFuture =\n        recvWithFuture(channel, CpuBuffer{.ptr = nullptr}, 0);\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(CpuChannelTestSuite, NullPointer);\n\n// This test wants to make sure that the \"heavy lifting\" of copying data isn't\n// performed inline inside the recv method as that would make the user-facing\n// read method of the pipe blocking.\n// However, since we can't really check that behavior, we'll check a highly\n// correlated one: that the recv callback isn't called inline from within the\n// recv method. We do so by having that behavior cause a deadlock.\nclass CallbacksAreDeferredTest : public ClientServerChannelTestCase {\n  static constexpr auto kDataSize = 256;\n\n public:\n  void server(std::shared_ptr<Channel> channel) override {\n    // Initialize with sequential values.\n    std::vector<uint8_t> data(kDataSize);\n    std::iota(data.begin(), data.end(), 0);\n\n    // Perform send and wait for completion.\n    std::promise<Error> sendPromise;\n    auto mutex = std::make_shared<std::mutex>();\n    std::unique_lock<std::mutex> callerLock(*mutex);\n    channel->send(\n        CpuBuffer{.ptr = data.data()},\n        kDataSize,\n        [&sendPromise, mutex](const Error& error) {\n          std::unique_lock<std::mutex> calleeLock(*mutex);\n          sendPromise.set_value(error);\n        });\n    callerLock.unlock();\n    Error sendError = sendPromise.get_future().get();\n    EXPECT_FALSE(sendError) << sendError.what();\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    // Initialize with zeroes.\n    std::vector<uint8_t> data(kDataSize);\n    std::fill(data.begin(), data.end(), 0);\n\n    // Perform recv and wait for completion.\n    std::promise<Error> recvPromise;\n    std::mutex mutex;\n    std::unique_lock<std::mutex> callerLock(mutex);\n    channel->recv(\n        CpuBuffer{.ptr = data.data()},\n        kDataSize,\n        [&recvPromise, &mutex](const Error& error) {\n          std::unique_lock<std::mutex> calleeLock(mutex);\n          recvPromise.set_value(error);\n        });\n    callerLock.unlock();\n    Error recvError = recvPromise.get_future().get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    // Validate contents of vector.\n    for (auto i = 0; i < kDataSize; i++) {\n      EXPECT_EQ(data[i], i);\n    }\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(CpuChannelTestSuite, CallbacksAreDeferred);\n"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cpu.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/common/cpu_buffer.h>\n#include <tensorpipe/test/channel/channel_test.h>\n\nclass CpuDataWrapper : public DataWrapper {\n public:\n  explicit CpuDataWrapper(size_t length) : vector_(length) {}\n\n  explicit CpuDataWrapper(std::vector<uint8_t> v) : vector_(v) {}\n\n  tensorpipe::Buffer buffer() const override {\n    return tensorpipe::CpuBuffer{.ptr = const_cast<uint8_t*>(vector_.data())};\n  }\n\n  size_t bufferLength() const override {\n    return vector_.size();\n  }\n\n  std::vector<uint8_t> unwrap() override {\n    return vector_;\n  }\n\n private:\n  std::vector<uint8_t> vector_;\n};\n\nclass CpuChannelTestHelper : public ChannelTestHelper {\n public:\n  std::unique_ptr<DataWrapper> makeDataWrapper(size_t length) override {\n    return std::make_unique<CpuDataWrapper>(length);\n  }\n\n  std::unique_ptr<DataWrapper> makeDataWrapper(\n      std::vector<uint8_t> v) override {\n    return std::make_unique<CpuDataWrapper>(std::move(v));\n  }\n};\n\nclass CpuChannelTestSuite\n    : public ::testing::TestWithParam<CpuChannelTestHelper*> {};\n"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cuda.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/channel/channel_test_cuda.h>\n\n#include <cuda_runtime.h>\n#include <gmock/gmock.h>\n\n#include <tensorpipe/test/channel/kernel.cuh>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::channel;\n\nclass ReceiverWaitsForStartEventTest : public ClientServerChannelTestCase {\n  static constexpr size_t kSize = 1024;\n\n  void server(std::shared_ptr<Channel> channel) override {\n    TP_CUDA_CHECK(cudaSetDevice(0));\n    cudaStream_t sendStream;\n    TP_CUDA_CHECK(\n        cudaStreamCreateWithFlags(&sendStream, cudaStreamNonBlocking));\n    void* ptr;\n    TP_CUDA_CHECK(cudaMalloc(&ptr, kSize));\n\n    // Delay sendStream with computations on buffer.\n    slowKernel(ptr, kSize, sendStream);\n\n    // Set buffer to target value.\n    TP_CUDA_CHECK(cudaMemsetAsync(ptr, 0x42, kSize, sendStream));\n\n    // Perform send and wait for completion.\n    auto sendPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto sendFuture = sendPromise->get_future();\n\n    channel->send(\n        CudaBuffer{\n            .ptr = ptr,\n            .stream = sendStream,\n        },\n        kSize,\n        [sendPromise{std::move(sendPromise)}](const tensorpipe::Error& error) {\n          sendPromise->set_value(error);\n        });\n\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n    TP_CUDA_CHECK(cudaFree(ptr));\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    TP_CUDA_CHECK(cudaSetDevice(0));\n    cudaStream_t recvStream;\n    TP_CUDA_CHECK(\n        cudaStreamCreateWithFlags(&recvStream, cudaStreamNonBlocking));\n    void* ptr;\n    TP_CUDA_CHECK(cudaMalloc(&ptr, kSize));\n\n    // Perform recv and wait for completion.\n    auto recvPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto recvFuture = recvPromise->get_future();\n\n    channel->recv(\n        CudaBuffer{\n            .ptr = ptr,\n            .stream = recvStream,\n        },\n        kSize,\n        [recvPromise{std::move(recvPromise)}](const tensorpipe::Error& error) {\n          recvPromise->set_value(error);\n        });\n\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    std::array<uint8_t, kSize> data;\n    TP_CUDA_CHECK(cudaStreamSynchronize(recvStream));\n    TP_CUDA_CHECK(cudaMemcpy(data.data(), ptr, kSize, cudaMemcpyDefault));\n    EXPECT_THAT(data, ::testing::Each(0x42));\n    TP_CUDA_CHECK(cudaFree(ptr));\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(CudaChannelTestSuite, ReceiverWaitsForStartEvent);\n\nclass SendOffsetAllocationTest : public ClientServerChannelTestCase {\n public:\n  static constexpr int kDataSize = 256;\n  static constexpr int kOffset = 128;\n\n  void server(std::shared_ptr<Channel> channel) override {\n    // Initialize with sequential values.\n    void* ptr;\n    TP_CUDA_CHECK(cudaMalloc(&ptr, kOffset + kDataSize));\n    // Set buffer to target value.\n    TP_CUDA_CHECK(cudaMemset(ptr, 0xff, kOffset));\n    TP_CUDA_CHECK(\n        cudaMemset(static_cast<uint8_t*>(ptr) + kOffset, 0x42, kDataSize));\n\n    // Perform send and wait for completion.\n    std::future<Error> sendFuture = sendWithFuture(\n        channel,\n        CudaBuffer{.ptr = static_cast<uint8_t*>(ptr) + kOffset},\n        kDataSize);\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    std::unique_ptr<DataWrapper> wrappedData =\n        helper_->makeDataWrapper(kDataSize);\n\n    // Perform recv and wait for completion.\n    std::future<Error> recvFuture = recvWithFuture(channel, *wrappedData);\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    // Validate contents of vector.\n    EXPECT_THAT(wrappedData->unwrap(), ::testing::Each(0x42));\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(CudaChannelTestSuite, SendOffsetAllocation);\n"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cuda.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <utility>\n#include <vector>\n\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_buffer.h>\n#include <tensorpipe/test/channel/channel_test.h>\n\nclass CudaDataWrapper : public DataWrapper {\n public:\n  // Non-copyable.\n  CudaDataWrapper(const CudaDataWrapper&) = delete;\n  CudaDataWrapper& operator=(const CudaDataWrapper&) = delete;\n  // Non-movable.\n  CudaDataWrapper(CudaDataWrapper&& other) = delete;\n  CudaDataWrapper& operator=(CudaDataWrapper&& other) = delete;\n\n  explicit CudaDataWrapper(size_t length) : length_(length) {\n    if (length_ > 0) {\n      TP_CUDA_CHECK(cudaSetDevice(0));\n      TP_CUDA_CHECK(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));\n      TP_CUDA_CHECK(cudaMalloc(&cudaPtr_, length_));\n    }\n  }\n\n  explicit CudaDataWrapper(std::vector<uint8_t> v) : CudaDataWrapper(v.size()) {\n    if (length_ > 0) {\n      TP_CUDA_CHECK(cudaMemcpyAsync(\n          cudaPtr_, v.data(), length_, cudaMemcpyDefault, stream_));\n    }\n  }\n\n  tensorpipe::Buffer buffer() const override {\n    return tensorpipe::CudaBuffer{\n        .ptr = cudaPtr_,\n        .stream = stream_,\n    };\n  }\n\n  size_t bufferLength() const override {\n    return length_;\n  }\n\n  std::vector<uint8_t> unwrap() override {\n    std::vector<uint8_t> v(length_);\n    if (length_ > 0) {\n      TP_CUDA_CHECK(cudaStreamSynchronize(stream_));\n      TP_CUDA_CHECK(cudaMemcpy(v.data(), cudaPtr_, length_, cudaMemcpyDefault));\n    }\n    return v;\n  }\n\n  ~CudaDataWrapper() override {\n    if (length_ > 0) {\n      TP_CUDA_CHECK(cudaFree(cudaPtr_));\n      TP_CUDA_CHECK(cudaStreamDestroy(stream_));\n    }\n  }\n\n private:\n  void* cudaPtr_{nullptr};\n  size_t length_{0};\n  cudaStream_t stream_{cudaStreamDefault};\n};\n\nclass CudaChannelTestHelper : public ChannelTestHelper {\n public:\n  std::unique_ptr<DataWrapper> makeDataWrapper(size_t length) override {\n    return std::make_unique<CudaDataWrapper>(length);\n  }\n\n  std::unique_ptr<DataWrapper> makeDataWrapper(\n      std::vector<uint8_t> v) override {\n    return std::make_unique<CudaDataWrapper>(std::move(v));\n  }\n};\n\nclass CudaChannelTestSuite\n    : public ::testing::TestWithParam<CudaChannelTestHelper*> {};\n\nclass CudaMultiGPUChannelTestSuite\n    : public ::testing::TestWithParam<CudaChannelTestHelper*> {};\n\nclass CudaXDTTChannelTestSuite\n    : public ::testing::TestWithParam<CudaChannelTestHelper*> {};\n"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cuda_multi_gpu.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <cuda_runtime.h>\n#include <gmock/gmock.h>\n\n#include <tensorpipe/test/channel/channel_test_cuda.h>\n#include <tensorpipe/test/channel/cuda_helpers.h>\n#include <tensorpipe/test/test_environment.h>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::channel;\n\nclass SendAcrossDevicesTest : public ClientServerChannelTestCase {\n  static constexpr size_t kSize = 1024;\n\n public:\n  void run(ChannelTestHelper* helper) override {\n    if (TestEnvironment::numCudaDevices() < 2) {\n      GTEST_SKIP() << \"Skipping test requiring >=2 CUDA devices.\";\n    }\n\n    ClientServerChannelTestCase::run(helper);\n  }\n\n private:\n  void server(std::shared_ptr<Channel> channel) override {\n    cudaStream_t sendStream;\n    void* ptr;\n    {\n      // Send happens from device #0.\n      CudaDeviceGuard guard(0);\n      TP_CUDA_CHECK(\n          cudaStreamCreateWithFlags(&sendStream, cudaStreamNonBlocking));\n      TP_CUDA_CHECK(cudaMalloc(&ptr, kSize));\n\n      // Set buffer to target value.\n      TP_CUDA_CHECK(cudaMemsetAsync(ptr, 0x42, kSize, sendStream));\n    }\n\n    // Perform send and wait for completion.\n    auto sendPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto sendFuture = sendPromise->get_future();\n\n    channel->send(\n        CudaBuffer{\n            .ptr = ptr,\n            .stream = sendStream,\n        },\n        kSize,\n        [sendPromise{std::move(sendPromise)}](const tensorpipe::Error& error) {\n          sendPromise->set_value(error);\n        });\n\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n\n    {\n      CudaDeviceGuard guard(0);\n      TP_CUDA_CHECK(cudaFree(ptr));\n      TP_CUDA_CHECK(cudaStreamDestroy(sendStream));\n    }\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void afterServer() override {\n    if (this->peers_->endpointsInSameProcess()) {\n      EXPECT_TRUE(initializedCudaContexts({0, 1}));\n    } else {\n      EXPECT_TRUE(initializedCudaContexts({0}));\n    }\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    cudaStream_t recvStream;\n    void* ptr;\n    {\n      // Recv happens on device #1.\n      CudaDeviceGuard guard(1);\n      TP_CUDA_CHECK(\n          cudaStreamCreateWithFlags(&recvStream, cudaStreamNonBlocking));\n      TP_CUDA_CHECK(cudaMalloc(&ptr, kSize));\n    }\n\n    // Perform recv and wait for completion.\n    auto recvPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto recvFuture = recvPromise->get_future();\n\n    channel->recv(\n        CudaBuffer{\n            .ptr = ptr,\n            .stream = recvStream,\n        },\n        kSize,\n        [recvPromise{std::move(recvPromise)}](const tensorpipe::Error& error) {\n          recvPromise->set_value(error);\n        });\n\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    {\n      CudaDeviceGuard guard(1);\n      std::array<uint8_t, kSize> data;\n      TP_CUDA_CHECK(cudaStreamSynchronize(recvStream));\n      TP_CUDA_CHECK(cudaMemcpy(data.data(), ptr, kSize, cudaMemcpyDefault));\n      EXPECT_THAT(data, ::testing::Each(0x42));\n      TP_CUDA_CHECK(cudaFree(ptr));\n      TP_CUDA_CHECK(cudaStreamDestroy(recvStream));\n    }\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n\n  void afterClient() override {\n    if (this->peers_->endpointsInSameProcess()) {\n      EXPECT_TRUE(initializedCudaContexts({0, 1}));\n    } else {\n      EXPECT_TRUE(initializedCudaContexts({1}));\n    }\n  }\n};\n\nCHANNEL_TEST(CudaMultiGPUChannelTestSuite, SendAcrossDevices);\n\nclass SendReverseAcrossDevicesTest : public ClientServerChannelTestCase {\n  static constexpr size_t kSize = 1024;\n\n public:\n  void run(ChannelTestHelper* helper) override {\n    if (TestEnvironment::numCudaDevices() < 2) {\n      GTEST_SKIP() << \"Skipping test requiring >=2 CUDA devices.\";\n    }\n\n    ClientServerChannelTestCase::run(helper);\n  }\n\n private:\n  void server(std::shared_ptr<Channel> channel) override {\n    cudaStream_t sendStream;\n    void* ptr;\n    {\n      // Send happens from device #1.\n      CudaDeviceGuard guard(1);\n      TP_CUDA_CHECK(\n          cudaStreamCreateWithFlags(&sendStream, cudaStreamNonBlocking));\n      TP_CUDA_CHECK(cudaMalloc(&ptr, kSize));\n\n      // Set buffer to target value.\n      TP_CUDA_CHECK(cudaMemsetAsync(ptr, 0x42, kSize, sendStream));\n    }\n\n    // Perform send and wait for completion.\n    auto sendPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto sendFuture = sendPromise->get_future();\n\n    channel->send(\n        CudaBuffer{\n            .ptr = ptr,\n            .stream = sendStream,\n        },\n        kSize,\n        [sendPromise{std::move(sendPromise)}](const tensorpipe::Error& error) {\n          sendPromise->set_value(error);\n        });\n\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n\n    {\n      CudaDeviceGuard guard(1);\n      TP_CUDA_CHECK(cudaFree(ptr));\n      TP_CUDA_CHECK(cudaStreamDestroy(sendStream));\n    }\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void afterServer() override {\n    if (this->peers_->endpointsInSameProcess()) {\n      EXPECT_TRUE(initializedCudaContexts({0, 1}));\n    } else {\n      EXPECT_TRUE(initializedCudaContexts({1}));\n    }\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    cudaStream_t recvStream;\n    void* ptr;\n    {\n      // Recv happens on device #0.\n      CudaDeviceGuard guard(0);\n      TP_CUDA_CHECK(\n          cudaStreamCreateWithFlags(&recvStream, cudaStreamNonBlocking));\n      TP_CUDA_CHECK(cudaMalloc(&ptr, kSize));\n    }\n\n    // Perform recv and wait for completion.\n    auto recvPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto recvFuture = recvPromise->get_future();\n\n    channel->recv(\n        CudaBuffer{\n            .ptr = ptr,\n            .stream = recvStream,\n        },\n        kSize,\n        [recvPromise{std::move(recvPromise)}](const tensorpipe::Error& error) {\n          recvPromise->set_value(error);\n        });\n\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    {\n      CudaDeviceGuard guard(0);\n      std::array<uint8_t, kSize> data;\n      TP_CUDA_CHECK(cudaStreamSynchronize(recvStream));\n      TP_CUDA_CHECK(cudaMemcpy(data.data(), ptr, kSize, cudaMemcpyDefault));\n      EXPECT_THAT(data, ::testing::Each(0x42));\n      TP_CUDA_CHECK(cudaFree(ptr));\n      TP_CUDA_CHECK(cudaStreamDestroy(recvStream));\n    }\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n\n  void afterClient() override {\n    if (this->peers_->endpointsInSameProcess()) {\n      EXPECT_TRUE(initializedCudaContexts({0, 1}));\n    } else {\n      EXPECT_TRUE(initializedCudaContexts({0}));\n    }\n  }\n};\n\nCHANNEL_TEST(CudaMultiGPUChannelTestSuite, SendReverseAcrossDevices);\n\nclass SendAcrossNonDefaultDevicesTest : public ClientServerChannelTestCase {\n  static constexpr size_t kSize = 1024;\n\n public:\n  void run(ChannelTestHelper* helper) override {\n    if (TestEnvironment::numCudaDevices() < 2) {\n      GTEST_SKIP() << \"Skipping test requiring >=2 CUDA devices.\";\n    }\n\n    ClientServerChannelTestCase::run(helper);\n  }\n\n private:\n  void server(std::shared_ptr<Channel> channel) override {\n    cudaStream_t sendStream;\n    void* ptr;\n    {\n      // Send happens from device #1.\n      CudaDeviceGuard guard(1);\n      TP_CUDA_CHECK(\n          cudaStreamCreateWithFlags(&sendStream, cudaStreamNonBlocking));\n      TP_CUDA_CHECK(cudaMalloc(&ptr, kSize));\n\n      // Set buffer to target value.\n      TP_CUDA_CHECK(cudaMemsetAsync(ptr, 0x42, kSize, sendStream));\n    }\n\n    // Perform send and wait for completion.\n    auto sendPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto sendFuture = sendPromise->get_future();\n\n    channel->send(\n        CudaBuffer{\n            .ptr = ptr,\n            .stream = sendStream,\n        },\n        kSize,\n        [sendPromise{std::move(sendPromise)}](const tensorpipe::Error& error) {\n          sendPromise->set_value(error);\n        });\n\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n\n    {\n      CudaDeviceGuard guard(1);\n      TP_CUDA_CHECK(cudaFree(ptr));\n      TP_CUDA_CHECK(cudaStreamDestroy(sendStream));\n    }\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void afterServer() override {\n    EXPECT_TRUE(initializedCudaContexts({1}));\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    cudaStream_t recvStream;\n    void* ptr;\n    {\n      // Recv happens on device #1.\n      CudaDeviceGuard guard(1);\n      TP_CUDA_CHECK(\n          cudaStreamCreateWithFlags(&recvStream, cudaStreamNonBlocking));\n      TP_CUDA_CHECK(cudaMalloc(&ptr, kSize));\n    }\n\n    // Perform recv and wait for completion.\n    auto recvPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto recvFuture = recvPromise->get_future();\n\n    channel->recv(\n        CudaBuffer{\n            .ptr = ptr,\n            .stream = recvStream,\n        },\n        kSize,\n        [recvPromise{std::move(recvPromise)}](const tensorpipe::Error& error) {\n          recvPromise->set_value(error);\n        });\n\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    {\n      CudaDeviceGuard guard(1);\n      std::array<uint8_t, kSize> data;\n      TP_CUDA_CHECK(cudaStreamSynchronize(recvStream));\n      TP_CUDA_CHECK(cudaMemcpy(data.data(), ptr, kSize, cudaMemcpyDefault));\n      EXPECT_THAT(data, ::testing::Each(0x42));\n      TP_CUDA_CHECK(cudaFree(ptr));\n      TP_CUDA_CHECK(cudaStreamDestroy(recvStream));\n    }\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n\n  void afterClient() override {\n    EXPECT_TRUE(initializedCudaContexts({1}));\n  }\n};\n\nCHANNEL_TEST(CudaMultiGPUChannelTestSuite, SendAcrossNonDefaultDevices);\n"
  },
  {
    "path": "tensorpipe/test/channel/channel_test_cuda_xdtt.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/channel/channel_test_cuda.h>\n\n#include <cuda_runtime.h>\n#include <gmock/gmock.h>\n\n#include <tensorpipe/test/channel/kernel.cuh>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::channel;\n\nclass SendFromCpuToGpuTest : public ClientServerChannelTestCase {\n  static constexpr size_t kSize = 1024;\n\n  void server(std::shared_ptr<Channel> channel) override {\n    // Perform send and wait for completion.\n    auto sendPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto sendFuture = sendPromise->get_future();\n\n    std::vector<uint8_t> data(kSize, 0x42);\n    channel->send(\n        CpuBuffer{\n            .ptr = data.data(),\n        },\n        kSize,\n        [sendPromise{std::move(sendPromise)}](const tensorpipe::Error& error) {\n          sendPromise->set_value(error);\n        });\n\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    TP_CUDA_CHECK(cudaSetDevice(0));\n    cudaStream_t recvStream;\n    TP_CUDA_CHECK(\n        cudaStreamCreateWithFlags(&recvStream, cudaStreamNonBlocking));\n    void* ptr;\n    TP_CUDA_CHECK(cudaMalloc(&ptr, kSize));\n\n    // Perform recv and wait for completion.\n    auto recvPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto recvFuture = recvPromise->get_future();\n\n    channel->recv(\n        CudaBuffer{\n            .ptr = ptr,\n            .stream = recvStream,\n        },\n        kSize,\n        [recvPromise{std::move(recvPromise)}](const tensorpipe::Error& error) {\n          recvPromise->set_value(error);\n        });\n\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    std::array<uint8_t, kSize> data;\n    TP_CUDA_CHECK(cudaStreamSynchronize(recvStream));\n    TP_CUDA_CHECK(cudaMemcpy(data.data(), ptr, kSize, cudaMemcpyDefault));\n    EXPECT_THAT(data, ::testing::Each(0x42));\n    TP_CUDA_CHECK(cudaFree(ptr));\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(CudaXDTTChannelTestSuite, SendFromCpuToGpu);\n\nclass SendFromGpuToCpuTest : public ClientServerChannelTestCase {\n  static constexpr size_t kSize = 1024;\n\n  void server(std::shared_ptr<Channel> channel) override {\n    TP_CUDA_CHECK(cudaSetDevice(0));\n    cudaStream_t sendStream;\n    TP_CUDA_CHECK(\n        cudaStreamCreateWithFlags(&sendStream, cudaStreamNonBlocking));\n    void* ptr;\n    TP_CUDA_CHECK(cudaMalloc(&ptr, kSize));\n\n    // Set buffer to target value.\n    TP_CUDA_CHECK(cudaMemsetAsync(ptr, 0x42, kSize, sendStream));\n\n    // Perform send and wait for completion.\n    auto sendPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto sendFuture = sendPromise->get_future();\n\n    channel->send(\n        CudaBuffer{\n            .ptr = ptr,\n            .stream = sendStream,\n        },\n        kSize,\n        [sendPromise{std::move(sendPromise)}](const tensorpipe::Error& error) {\n          sendPromise->set_value(error);\n        });\n\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n    TP_CUDA_CHECK(cudaFree(ptr));\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    // Perform recv and wait for completion.\n    auto recvPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto recvFuture = recvPromise->get_future();\n\n    std::vector<uint8_t> data(kSize);\n    channel->recv(\n        CpuBuffer{\n            .ptr = data.data(),\n        },\n        kSize,\n        [recvPromise{std::move(recvPromise)}](const tensorpipe::Error& error) {\n          recvPromise->set_value(error);\n        });\n\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    EXPECT_THAT(data, ::testing::Each(0x42));\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(CudaXDTTChannelTestSuite, SendFromGpuToCpu);\n\nclass SendFromCpuToCpuTest : public ClientServerChannelTestCase {\n  static constexpr size_t kSize = 1024;\n\n  void server(std::shared_ptr<Channel> channel) override {\n    // Perform send and wait for completion.\n    auto sendPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto sendFuture = sendPromise->get_future();\n\n    std::vector<uint8_t> data(kSize, 0x42);\n    channel->send(\n        CpuBuffer{\n            .ptr = data.data(),\n        },\n        kSize,\n        [sendPromise{std::move(sendPromise)}](const tensorpipe::Error& error) {\n          sendPromise->set_value(error);\n        });\n\n    Error sendError = sendFuture.get();\n    EXPECT_FALSE(sendError) << sendError.what();\n\n    this->peers_->done(PeerGroup::kServer);\n    this->peers_->join(PeerGroup::kServer);\n  }\n\n  void client(std::shared_ptr<Channel> channel) override {\n    // Perform recv and wait for completion.\n    auto recvPromise = std::make_shared<std::promise<tensorpipe::Error>>();\n    auto recvFuture = recvPromise->get_future();\n\n    std::vector<uint8_t> data(kSize);\n    channel->recv(\n        CpuBuffer{\n            .ptr = data.data(),\n        },\n        kSize,\n        [recvPromise{std::move(recvPromise)}](const tensorpipe::Error& error) {\n          recvPromise->set_value(error);\n        });\n\n    Error recvError = recvFuture.get();\n    EXPECT_FALSE(recvError) << recvError.what();\n\n    EXPECT_THAT(data, ::testing::Each(0x42));\n\n    this->peers_->done(PeerGroup::kClient);\n    this->peers_->join(PeerGroup::kClient);\n  }\n};\n\nCHANNEL_TEST(CudaXDTTChannelTestSuite, SendFromCpuToCpu);\n"
  },
  {
    "path": "tensorpipe/test/channel/cma/CMakeLists.txt",
    "content": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\nadd_executable(tensorpipe_channel_cma_probe\n  probe.cc\n)\n\ntarget_link_libraries(tensorpipe_channel_cma_probe PRIVATE\n  tensorpipe\n)\n"
  },
  {
    "path": "tensorpipe/test/channel/cma/cma_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/cma/factory.h>\n#include <tensorpipe/test/channel/channel_test_cpu.h>\n\nnamespace {\n\nclass CmaChannelTestHelper : public CpuChannelTestHelper {\n protected:\n  std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(\n      std::string id) override {\n    auto context = tensorpipe::channel::cma::create();\n    context->setId(std::move(id));\n    return context;\n  }\n};\n\nCmaChannelTestHelper helper;\n\n} // namespace\n\nINSTANTIATE_TEST_CASE_P(Cma, ChannelTestSuite, ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(Cma, CpuChannelTestSuite, ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/channel/cma/docker_tests.sh",
    "content": "#!/usr/bin/env bash\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\n# We use a lot of trailing backslashes inside single-quoted string literals when\n# we pass sub-scripts to sh -c, in order to wrap lines for long commands.\n# Removing them would be incorrect, hence we just silence the linter warning.\n# shellcheck disable=SC1004\n\nset -eo pipefail\n\n\necho \"Both endpoints in same vanilla container\"\n# This is not supposed to work, as Docker by default has a seccomp-bpf rule that\n# blocks the process_vm_readv syscall.\n# See https://jvns.ca/blog/2020/04/29/why-strace-doesnt-work-in-docker/\n# and https://docs.docker.com/engine/security/seccomp/\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  0 /tmp/report/socket \\\n  > /tmp/report/probe1_report.json & \\\n  probe1_pid=$!; \\\n  while [ ! -S /tmp/report/socket ]; do sleep 0.1; done; \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  1 /tmp/report/socket \\\n  > /tmp/report/probe2_report.json & \\\n  probe2_pid=$!; \\\n  wait $probe1_pid; \\\n  wait $probe2_pid'\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 0\n\n\necho \"Both endpoints in same container, seccomp-bpf disabled\"\n# This fixes the above problem, and makes it work.\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --security-opt seccomp=unconfined \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  0 /tmp/report/socket \\\n  > /tmp/report/probe1_report.json & \\\n  probe1_pid=$!; \\\n  while [ ! -S /tmp/report/socket ]; do sleep 0.1; done; \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  1 /tmp/report/socket \\\n  > /tmp/report/probe2_report.json & \\\n  probe2_pid=$!; \\\n  wait $probe1_pid; \\\n  wait $probe2_pid'\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 1\n\n\necho \"Both endpoints in same container, capability SYS_PTRACE added\"\n# This should not really matter, but Docker adds a \"side effect\" to this which\n# also re-enables process_vm_readv in seccomp-bpf.\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --cap-add SYS_PTRACE \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  0 /tmp/report/socket \\\n  > /tmp/report/probe1_report.json & \\\n  probe1_pid=$!; \\\n  while [ ! -S /tmp/report/socket ]; do sleep 0.1; done; \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  1 /tmp/report/socket \\\n  > /tmp/report/probe2_report.json & \\\n  probe2_pid=$!; \\\n  wait $probe1_pid; \\\n  wait $probe2_pid'\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 1\n\n\necho \"Both endpoints in same container, privileged\"\n# This should not really matter, but Docker adds a \"side effect\" to this which\n# also re-enables process_vm_readv in seccomp-bpf.\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --privileged \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  0 /tmp/report/socket \\\n  > /tmp/report/probe1_report.json & \\\n  probe1_pid=$!; \\\n  while [ ! -S /tmp/report/socket ]; do sleep 0.1; done; \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  1 /tmp/report/socket \\\n  > /tmp/report/probe2_report.json & \\\n  probe2_pid=$!; \\\n  wait $probe1_pid; \\\n  wait $probe2_pid'\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 1\n\n\necho \"Both endpoints in same container, stronger YAMA limits\"\n# CMA is able to work under YAMA when the latter is set to levels 0 or 1, as\n# in the first case YAMA adds no extra limit and in the second case CMA will\n# configure YAMA so that it allows the process to be ptraced by any other one.\n# However CMA can't handle YAMA at level 2 or higher.\n# We keep disabling seccomp-bpf as otherwise this would be shadowed.\n\nsudo sh -c 'echo 2 > /proc/sys/kernel/yama/ptrace_scope'\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --security-opt seccomp=unconfined \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  0 /tmp/report/socket \\\n  > /tmp/report/probe1_report.json & \\\n  probe1_pid=$!; \\\n  while [ ! -S /tmp/report/socket ]; do sleep 0.1; done; \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  1 /tmp/report/socket \\\n  > /tmp/report/probe2_report.json & \\\n  probe2_pid=$!; \\\n  wait $probe1_pid; \\\n  wait $probe2_pid'\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 0\n\nsudo sh -c 'echo 1 > /proc/sys/kernel/yama/ptrace_scope'\n\n\n# TODO\n# echo \"Both endpoints in same container, different users/groups\"\n\n\n# TODO\n# echo \"Both endpoints in same container, same users/groups but different effective user/group\"\n\n\necho \"Each endpoint in own container, with separate namespace\"\n# This isn't supposed to work, as each container gets its own user and PID\n# namespace, but CMA needs them to match. We disable seccomp-bpf to give this\n# test a fighting chance.\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --security-opt seccomp=unconfined \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  0 /tmp/report/socket \\\n  > /tmp/report/probe1_report.json' &\nprobe1_pid=$!\nwhile [ ! -S \"$TEMPDIR/socket\" ]; do sleep 0.1; done\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --security-opt seccomp=unconfined \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  1 /tmp/report/socket \\\n  > /tmp/report/probe2_report.json' &\nprobe2_pid=$!\nwait $probe1_pid\nwait $probe2_pid\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 0\n\n\n# Docker allows a container to reuse another one's PID namespace, but doesn't\n# allow the same for user namespaces.\n\n\necho \"Each endpoint in own container, reusing host namespaces\"\n# This should fix the issues of the above.\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --security-opt seccomp=unconfined \\\n  --pid host \\\n  --userns host \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  0 /tmp/report/socket \\\n  > /tmp/report/probe1_report.json' &\nprobe1_pid=$!\nwhile [ ! -S \"$TEMPDIR/socket\" ]; do sleep 0.1; done\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --security-opt seccomp=unconfined \\\n  --pid host \\\n  --userns host \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  1 /tmp/report/socket \\\n  > /tmp/report/probe2_report.json' &\nprobe2_pid=$!\nwait $probe1_pid\nwait $probe2_pid\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 1\n\n\necho \"Each endpoint in own container, privileged, sharing PID namespace\"\n# This should also help.\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\ndocker run \\\n  --cidfile \"$TEMPDIR/probe1_container_id\" \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --privileged \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  0 /tmp/report/socket \\\n  > /tmp/report/probe1_report.json' &\nprobe1_pid=$!\nwhile [ ! -S \"$TEMPDIR/socket\" ]; do sleep 0.1; done\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --pid \"container:$(cat \"$TEMPDIR/probe1_container_id\")\" \\\n  --privileged \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  1 /tmp/report/socket \\\n  > /tmp/report/probe2_report.json' &\nprobe2_pid=$!\nwait $probe1_pid\nwait $probe2_pid\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 1\n\n\necho \"One endpoint on host, other in container, with own namespace\"\n# This isn't supposed to work, as each container gets its own user and PID\n# namespace, but CMA needs them to match. We disable seccomp-bpf to give this\n# test a fighting chance. And also AppArmor, as it starts mattering here,\n# because Docker sets its own profile (docker-default) which is different than\n# the host's one (unconfined).\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --security-opt seccomp=unconfined \\\n  --security-opt apparmor=unconfined \\\n  --user \"$(id -u):$(id -g)\" \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  0 /tmp/report/socket \\\n  > /tmp/report/probe1_report.json' &\nprobe1_pid=$!\nwhile [ ! -S \"$TEMPDIR/socket\" ]; do sleep 0.1; done\nsudo chmod ugo+rwx \"$TEMPDIR\"/socket\nTP_VERBOSE_LOGGING=5 \\\n  \"$(pwd)/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe\" \\\n  1 \"$TEMPDIR/socket\" \\\n  > \"$TEMPDIR/probe2_report.json\" &\nprobe2_pid=$!\nwait $probe1_pid\nwait $probe2_pid\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 0\n\n\necho \"One endpoint on host, other in container, reusing host namespace\"\n# This should fix the issues of the above.\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --security-opt seccomp=unconfined \\\n  --security-opt apparmor=unconfined \\\n  --pid host \\\n  --userns host \\\n  --user \"$(id -u):$(id -g)\" \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  0 /tmp/report/socket \\\n  > /tmp/report/probe1_report.json' &\nprobe1_pid=$!\nwhile [ ! -S \"$TEMPDIR/socket\" ]; do sleep 0.1; done\nsudo chmod ugo+rwx \"$TEMPDIR\"/socket\nTP_VERBOSE_LOGGING=5 \\\n  \"$(pwd)/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe\" \\\n  1 \"$TEMPDIR/socket\" \\\n  > \"$TEMPDIR/probe2_report.json\" &\nprobe2_pid=$!\nwait $probe1_pid\nwait $probe2_pid\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 1\n\n\necho \"One endpoint on host, other in container, privileged\"\n# This should also help.\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\ndocker run \\\n  --volume \"$TEMPDIR:/tmp/report\" \\\n  --volume \"$(pwd)/build:/tmp/build\" \\\n  --security-opt seccomp=unconfined \\\n  --security-opt apparmor=unconfined \\\n  --pid host \\\n  --user \"$(id -u):$(id -g)\" \\\n  --privileged \\\n  cimg/base:2020.01 \\\n  sh -c ' \\\n  TP_VERBOSE_LOGGING=5 \\\n  /tmp/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe \\\n  0 /tmp/report/socket \\\n  > /tmp/report/probe1_report.json' &\nprobe1_pid=$!\nwhile [ ! -S \"$TEMPDIR/socket\" ]; do sleep 0.1; done\nsudo chmod ugo+rwx \"$TEMPDIR\"/socket\nTP_VERBOSE_LOGGING=5 \\\n  \"$(pwd)/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe\" \\\n  1 \"$TEMPDIR/socket\" \\\n  > \"$TEMPDIR/probe2_report.json\" &\nprobe2_pid=$!\nwait $probe1_pid\nwait $probe2_pid\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 1\n\n\necho \"Both endpoints on host\"\n# Should be a no-brainer?\n\nTEMPDIR=$(mktemp --directory)\nchmod ugo+rwx \"$TEMPDIR\"\necho \"Using $TEMPDIR for staging data\"\n\nTP_VERBOSE_LOGGING=5 \\\n  \"$(pwd)/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe\" \\\n  0 \"$TEMPDIR/socket\" \\\n  > \"$TEMPDIR/probe1_report.json\" &\nprobe1_pid=$!\nwhile [ ! -S \"$TEMPDIR/socket\" ]; do sleep 0.1; done\nTP_VERBOSE_LOGGING=5 \\\n  \"$(pwd)/build/tensorpipe/test/channel/cma/tensorpipe_channel_cma_probe\" \\\n  1 \"$TEMPDIR/socket\" \\\n  > \"$TEMPDIR/probe2_report.json\" &\nprobe2_pid=$!\nwait $probe1_pid\nwait $probe2_pid\n\npython3 \\\n  \"$(pwd)/tensorpipe/test/channel/cma/probe_report_checker.py\" \\\n  \"$TEMPDIR/probe1_report.json\" \"$TEMPDIR/probe2_report.json\" 1\n"
  },
  {
    "path": "tensorpipe/test/channel/cma/probe.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <cstring>\n\n#include <sys/prctl.h>\n#include <sys/socket.h>\n#include <sys/types.h>\n#include <sys/uio.h>\n#include <sys/un.h>\n#include <unistd.h>\n\n#include <tensorpipe/channel/cma/factory.h>\n#include <tensorpipe/common/defs.h>\n\nnamespace {}\n\nint main(int argc, char* argv[]) {\n  TP_THROW_ASSERT_IF(argc < 1);\n  if (argc != 3) {\n    TP_LOG_INFO() << \"Usage: \" << argv[0]\n                  << \" [rank] [path to a UNIX domain socket]\";\n    return 0;\n  }\n\n  TP_LOG_INFO() << \"My PID is \" << ::getpid();\n\n  int rank = std::strtol(argv[1], nullptr, 10);\n\n  int rv;\n  int fd = ::socket(AF_UNIX, SOCK_STREAM, 0);\n  TP_THROW_SYSTEM_IF(fd < 0, errno);\n\n  struct sockaddr_un socketAddr;\n  std::memset(&socketAddr, 0, sizeof(struct sockaddr_un));\n  socketAddr.sun_family = AF_UNIX;\n  std::strcpy(socketAddr.sun_path, argv[2]);\n\n  if (rank == 0) {\n    rv = ::bind(\n        fd,\n        reinterpret_cast<struct sockaddr*>(&socketAddr),\n        sizeof(struct sockaddr_un));\n    TP_THROW_SYSTEM_IF(rv < 0, errno);\n    rv = ::listen(fd, 0);\n    TP_THROW_SYSTEM_IF(rv < 0, errno);\n    struct sockaddr_storage peerAddr;\n    socklen_t peerAddrlen = sizeof(struct sockaddr_storage);\n    do {\n      rv = ::accept(\n          fd, reinterpret_cast<struct sockaddr*>(&peerAddr), &peerAddrlen);\n      TP_THROW_SYSTEM_IF(rv < 0 && errno != EINTR, errno);\n    } while (rv < 0);\n    int otherFd = rv;\n    rv = ::close(fd);\n    TP_THROW_SYSTEM_IF(rv < 0, errno);\n    rv = ::unlink(argv[2]);\n    TP_THROW_SYSTEM_IF(rv < 0, errno);\n    fd = otherFd;\n  } else {\n    do {\n      rv = ::connect(\n          fd,\n          reinterpret_cast<struct sockaddr*>(&socketAddr),\n          sizeof(struct sockaddr_un));\n      TP_THROW_SYSTEM_IF(rv < 0 && errno != EINTR, errno);\n    } while (rv < 0);\n  }\n\n  struct ucred peerCreds;\n  std::memset(&peerCreds, 0, sizeof(struct ucred));\n  socklen_t peerCredsLen = sizeof(struct ucred);\n  rv = ::getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &peerCreds, &peerCredsLen);\n\n  pid_t peerPid = peerCreds.pid;\n\n  TP_LOG_INFO() << \"The peer's PID is \" << peerPid;\n\n  rv = ::prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n\n  uint64_t outbox = 0x0123456789abcdef;\n  void* outboxPtr = &outbox;\n  TP_LOG_INFO() << \"My outbox's address is 0x\" << std::hex\n                << reinterpret_cast<uintptr_t>(outboxPtr);\n  rv = ::write(fd, &outboxPtr, sizeof(void*));\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n  TP_THROW_ASSERT_IF(rv != sizeof(void*));\n  void* peerOutboxPtr;\n  rv = ::read(fd, &peerOutboxPtr, sizeof(void*));\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n  TP_THROW_ASSERT_IF(rv != sizeof(void*));\n  TP_LOG_INFO() << \"The peer's inbox address is 0x\" << std::hex\n                << reinterpret_cast<uintptr_t>(peerOutboxPtr);\n\n  uint64_t inbox;\n  struct iovec localIov;\n  std::memset(&localIov, 0, sizeof(struct iovec));\n  localIov.iov_base = &inbox;\n  localIov.iov_len = sizeof(uint64_t);\n  struct iovec remoteIov;\n  std::memset(&remoteIov, 0, sizeof(struct iovec));\n  remoteIov.iov_base = peerOutboxPtr;\n  remoteIov.iov_len = sizeof(uint64_t);\n\n  ssize_t result = ::process_vm_readv(peerPid, &localIov, 1, &remoteIov, 1, 0);\n  TP_LOG_INFO() << \"Calling process_vm_readv returned \" << result\n                << \", errno is set to \" << errno\n                << \" and my inbox now has value 0x\" << std::hex << inbox;\n  bool successful = false;\n  if (result >= 0) {\n    TP_THROW_ASSERT_IF(result != sizeof(uint64_t));\n    TP_THROW_ASSERT_IF(inbox != 0x0123456789abcdef);\n    successful = true;\n  }\n\n  uint8_t ack;\n  rv = ::write(fd, &ack, sizeof(uint8_t));\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n  TP_THROW_ASSERT_IF(rv != sizeof(uint8_t));\n  rv = ::read(fd, &ack, sizeof(uint8_t));\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n  TP_THROW_ASSERT_IF(rv != sizeof(uint8_t));\n\n  rv = ::close(fd);\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n\n  auto ctx = tensorpipe::channel::cma::create();\n  TP_LOG_INFO() << \"The CMA context's viability is: \" << std::boolalpha\n                << ctx->isViable();\n  std::string descriptor;\n  if (ctx->isViable()) {\n    auto cpuDevice = tensorpipe::Device{tensorpipe::kCpuDeviceType, 0};\n    auto deviceDescriptors = ctx->deviceDescriptors();\n    auto iter = deviceDescriptors.find(cpuDevice);\n    TP_DCHECK(iter != deviceDescriptors.end());\n    descriptor = iter->second;\n  }\n  TP_LOG_INFO() << \"Its descriptor is: \" << descriptor;\n\n  std::cout << \"{\\\"syscall_success\\\": \" << successful\n            << \", \\\"viability\\\": \" << ctx->isViable()\n            << \", \\\"device_descriptor\\\": \\\"\" << descriptor << \"\\\"}\"\n            << std::endl;\n}\n"
  },
  {
    "path": "tensorpipe/test/channel/cma/probe_report_checker.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\nimport json\nimport sys\n\nif __name__ == \"__main__\":\n    if len(sys.argv) < 1:\n        raise RuntimeError()\n    if len(sys.argv) != 4:\n        print(\n            f\"Usage: {sys.argv[0]} [first report] [second report] [supposed to work]\",\n            file=sys.stderr,\n        )\n        sys.exit(0)\n\n    with open(sys.argv[1], \"rb\") as f:\n        first_report = json.load(f)\n    with open(sys.argv[2], \"rb\") as f:\n        second_report = json.load(f)\n    supposed_to_work = int(sys.argv[3])\n\n    worked_in_practice = (\n        first_report[\"syscall_success\"] == 1 and second_report[\"syscall_success\"] == 1\n    )\n    if worked_in_practice != supposed_to_work:\n        raise RuntimeError(\n            f\"The syscall didn't behave as the test expected it to. It \"\n            f\"{'succeeded' if worked_in_practice else 'failed'} whereas it was \"\n            f\"supposed to {'succeed' if supposed_to_work else 'fail'}.\"\n        )\n\n    detected_as_working = (\n        first_report[\"viability\"] == 1\n        and second_report[\"viability\"] == 1\n        and first_report[\"device_descriptor\"] == second_report[\"device_descriptor\"]\n    )\n    if detected_as_working != worked_in_practice:\n        print(\n            f\"The CMA autodetection didn't correctly predict the behavior of the \"\n            f\"syscall. It determined it would \"\n            f\"{'succeed' if detected_as_working else 'fail'} whereas it actually \"\n            f\"{'succeeded' if worked_in_practice else 'failed'}.\",\n            file=sys.stderr,\n        )\n        sys.exit(1)\n\n    sys.exit(0)\n"
  },
  {
    "path": "tensorpipe/test/channel/cuda_basic/cuda_basic_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <numeric>\n\n#include <tensorpipe/channel/basic/factory.h>\n#include <tensorpipe/channel/cuda_basic/factory.h>\n#include <tensorpipe/test/channel/channel_test_cuda.h>\n\nnamespace {\n\nclass CudaBasicChannelTestHelper : public CudaChannelTestHelper {\n protected:\n  std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(\n      std::string id) override {\n    auto cpuContext = tensorpipe::channel::basic::create();\n    auto context =\n        tensorpipe::channel::cuda_basic::create(std::move(cpuContext));\n    context->setId(std::move(id));\n    return context;\n  }\n\n public:\n  std::shared_ptr<PeerGroup> makePeerGroup() override {\n    return std::make_shared<ProcessPeerGroup>();\n  }\n};\n\nCudaBasicChannelTestHelper helper;\n\nclass CudaBasicChannelTestSuite : public ChannelTestSuite {};\n\n} // namespace\n\nclass CannotCommunicateCpuToCpuTest : public ChannelTestCase {\n public:\n  void run(ChannelTestHelper* /* unused */) override {\n    ForkedThreadPeerGroup pg;\n    pg.spawn(\n        [&]() {\n          auto cpuContext = tensorpipe::channel::basic::create();\n          auto ctx =\n              tensorpipe::channel::cuda_basic::create(std::move(cpuContext));\n          auto deviceDescriptors = ctx->deviceDescriptors();\n          auto it = deviceDescriptors.find(\n              tensorpipe::Device{tensorpipe::kCpuDeviceType, 0});\n          EXPECT_FALSE(it == deviceDescriptors.end());\n          auto descriptor = it->second;\n          EXPECT_FALSE(ctx->canCommunicateWithRemote(descriptor, descriptor));\n        },\n        [&]() {\n          // Do nothing.\n        });\n  }\n};\n\nCHANNEL_TEST(CudaBasicChannelTestSuite, CannotCommunicateCpuToCpu);\n\nINSTANTIATE_TEST_CASE_P(\n    CudaBasic,\n    ChannelTestSuite,\n    ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(\n    CudaBasic,\n    CudaChannelTestSuite,\n    ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(\n    CudaBasic,\n    CudaMultiGPUChannelTestSuite,\n    ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(\n    CudaBasic,\n    CudaXDTTChannelTestSuite,\n    ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(\n    CudaBasic,\n    CudaBasicChannelTestSuite,\n    ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/channel/cuda_gdr/cuda_gdr_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <numeric>\n\n#include <tensorpipe/channel/cuda_gdr/factory.h>\n#include <tensorpipe/test/channel/channel_test_cuda.h>\n\nnamespace {\n\nclass CudaGdrChannelTestHelper : public CudaChannelTestHelper {\n protected:\n  std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(\n      std::string id) override {\n    auto context = tensorpipe::channel::cuda_gdr::create();\n    context->setId(std::move(id));\n    return context;\n  }\n\n public:\n  std::shared_ptr<PeerGroup> makePeerGroup() override {\n    return std::make_shared<ProcessPeerGroup>();\n  }\n};\n\nCudaGdrChannelTestHelper helper;\n\n} // namespace\n\nINSTANTIATE_TEST_CASE_P(CudaGdr, ChannelTestSuite, ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(\n    CudaGdr,\n    CudaChannelTestSuite,\n    ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(\n    CudaGdr,\n    CudaMultiGPUChannelTestSuite,\n    ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/channel/cuda_helpers.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <algorithm>\n#include <cstdlib>\n#include <string>\n#include <tuple>\n#include <vector>\n\n#include <gtest/gtest.h>\n\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_lib.h>\n#include <tensorpipe/common/nvml_lib.h>\n\nnamespace tensorpipe {\n\ninline bool isContextOpenOnDevice(const NvmlLib& nvmlLib, nvmlDevice_t device) {\n  unsigned int count = 0;\n  std::vector<nvmlProcessInfo_t> processInfos;\n  while (true) {\n    nvmlReturn_t res = nvmlLib.deviceGetComputeRunningProcesses(\n        device, &count, processInfos.data());\n    processInfos.resize(count);\n    if (res == NVML_SUCCESS) {\n      break;\n    }\n    if (res == NVML_ERROR_INSUFFICIENT_SIZE) {\n      continue;\n    }\n    TP_NVML_CHECK(nvmlLib, res);\n  }\n\n  pid_t myPid = ::getpid();\n  for (const nvmlProcessInfo_t& processInfo : processInfos) {\n    if (processInfo.pid == myPid) {\n      return true;\n    }\n  }\n  return false;\n}\n\ninline ::testing::AssertionResult initializedCudaContexts(\n    const std::vector<int>& expectedDeviceIndices) {\n  // This check won't work when the test is running in a PID namespace, as NVML\n  // will return the PIDs in the root namespace but it doesn't seem possible for\n  // us to map them back to our namespace. Hence we use an env var to allow to\n  // disable this check in such environments.\n  char* shouldSkip = std::getenv(\"TP_SKIP_CHECK_OPEN_CUDA_CTXS\");\n  if (shouldSkip != nullptr) {\n    return ::testing::AssertionSuccess();\n  }\n\n  Error error;\n  CudaLib cudaLib;\n  std::tie(error, cudaLib) = CudaLib::create();\n  TP_THROW_ASSERT_IF(error) << error.what();\n  NvmlLib nvmlLib;\n  std::tie(error, nvmlLib) = NvmlLib::create();\n  TP_THROW_ASSERT_IF(error) << error.what();\n\n  std::vector<std::string> uuids = getUuidsOfVisibleDevices(cudaLib);\n  for (int deviceIdx = 0; deviceIdx < uuids.size(); deviceIdx++) {\n    // NVML uses a different format for UUIDs.\n    std::string nvmlUuid = \"GPU-\" + uuids[deviceIdx];\n    nvmlDevice_t nvmlDevice;\n    TP_NVML_CHECK(\n        nvmlLib, nvmlLib.deviceGetHandleByUUID(nvmlUuid.c_str(), &nvmlDevice));\n    bool actualHasCtx = isContextOpenOnDevice(nvmlLib, nvmlDevice);\n\n    bool expectedHasCtx = std::find(\n                              expectedDeviceIndices.begin(),\n                              expectedDeviceIndices.end(),\n                              deviceIdx) != expectedDeviceIndices.end();\n\n    if (actualHasCtx && !expectedHasCtx) {\n      return ::testing::AssertionFailure()\n          << \"a CUDA context was initialized on device #\" << deviceIdx\n          << \" but that shouldn't have happened\";\n    }\n    if (!actualHasCtx && expectedHasCtx) {\n      return ::testing::AssertionFailure()\n          << \"a CUDA context should have been initialized on device #\"\n          << deviceIdx << \" but that didn't happen\";\n    }\n  }\n  return ::testing::AssertionSuccess();\n}\n\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/test/channel/cuda_ipc/cuda_ipc_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <numeric>\n\n#include <tensorpipe/channel/cuda_ipc/factory.h>\n#include <tensorpipe/test/channel/channel_test_cuda.h>\n\nnamespace {\n\nclass CudaIpcChannelTestHelper : public CudaChannelTestHelper {\n protected:\n  std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(\n      std::string id) override {\n    auto context = tensorpipe::channel::cuda_ipc::create();\n    context->setId(std::move(id));\n    return context;\n  }\n\n public:\n  std::shared_ptr<PeerGroup> makePeerGroup() override {\n    return std::make_shared<ProcessPeerGroup>();\n  }\n};\n\nCudaIpcChannelTestHelper helper;\n\nclass CudaIpcChannelTestSuite : public ChannelTestSuite {};\n\n} // namespace\n\nclass CannotCommunicateInSameProcessTest : public ChannelTestCase {\n public:\n  void run(ChannelTestHelper* /* unused */) override {\n    ForkedThreadPeerGroup pg;\n    pg.spawn(\n        [&]() {\n          auto ctx = tensorpipe::channel::cuda_ipc::create();\n          auto deviceDescriptors = ctx->deviceDescriptors();\n          EXPECT_GT(deviceDescriptors.size(), 0);\n          auto descriptor = deviceDescriptors.begin()->second;\n          // From within a given process, the device descriptors will be the\n          // same.\n          EXPECT_FALSE(ctx->canCommunicateWithRemote(descriptor, descriptor));\n        },\n        [&]() {\n          // Do nothing.\n        });\n  }\n};\n\nCHANNEL_TEST(CudaIpcChannelTestSuite, CannotCommunicateInSameProcess);\n\nINSTANTIATE_TEST_CASE_P(CudaIpc, ChannelTestSuite, ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(\n    CudaIpc,\n    CudaChannelTestSuite,\n    ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(\n    CudaIpc,\n    CudaMultiGPUChannelTestSuite,\n    ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(\n    CudaIpc,\n    CudaIpcChannelTestSuite,\n    ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/channel/cuda_xth/cuda_xth_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <numeric>\n\n#include <tensorpipe/channel/cuda_xth/factory.h>\n#include <tensorpipe/test/channel/channel_test_cuda.h>\n\nnamespace {\n\nclass CudaXthChannelTestHelper : public CudaChannelTestHelper {\n protected:\n  std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(\n      std::string id) override {\n    auto context = tensorpipe::channel::cuda_xth::create();\n    context->setId(std::move(id));\n    return context;\n  }\n\n public:\n  std::shared_ptr<PeerGroup> makePeerGroup() override {\n    return std::make_shared<ForkedThreadPeerGroup>();\n  }\n};\n\nCudaXthChannelTestHelper helper;\n\n} // namespace\n\nINSTANTIATE_TEST_CASE_P(CudaXth, ChannelTestSuite, ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(\n    CudaXth,\n    CudaChannelTestSuite,\n    ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(\n    CudaXth,\n    CudaMultiGPUChannelTestSuite,\n    ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/channel/kernel.cu",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <cuda.h>\n\n__global__ void _slowKernel(char* ptr, int sz) {\n  int idx = blockIdx.x * blockDim.x + threadIdx.x;\n  for (; idx < sz; idx += (gridDim.x * blockDim.x)) {\n    for (int i = 0; i < 100000; ++i) {\n      ptr[idx] += ptr[(idx + 1007) % sz] + i;\n    }\n  }\n}\n\nvoid slowKernel(void* ptr, int kSize, cudaStream_t stream) {\n  _slowKernel<<<128, 128, 0, stream>>>((char*)ptr, kSize);\n}\n"
  },
  {
    "path": "tensorpipe/test/channel/kernel.cuh",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cuda_runtime.h>\n\n// This kernel takes time and puts garbage data in the buffer. It is used to\n// test proper synchronization in CUDA channels.\nvoid slowKernel(void* ptr, int kSize, cudaStream_t stream);\n"
  },
  {
    "path": "tensorpipe/test/channel/mpt/mpt_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/context.h>\n#include <tensorpipe/channel/mpt/factory.h>\n#include <tensorpipe/common/cpu_buffer.h>\n#include <tensorpipe/test/channel/channel_test_cpu.h>\n#include <tensorpipe/transport/connection.h>\n\nnamespace {\n\nclass MptChannelTestHelper : public CpuChannelTestHelper {\n protected:\n  std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(\n      std::string id) override {\n    std::vector<std::shared_ptr<tensorpipe::transport::Context>> contexts = {\n        tensorpipe::transport::uv::create(),\n        tensorpipe::transport::uv::create(),\n        tensorpipe::transport::uv::create()};\n    std::vector<std::shared_ptr<tensorpipe::transport::Listener>> listeners = {\n        contexts[0]->listen(\"127.0.0.1\"),\n        contexts[1]->listen(\"127.0.0.1\"),\n        contexts[2]->listen(\"127.0.0.1\")};\n    auto context = tensorpipe::channel::mpt::create(\n        std::move(contexts), std::move(listeners));\n    context->setId(std::move(id));\n    return context;\n  }\n};\n\nMptChannelTestHelper helper;\n\nclass MptChannelTestSuite : public ChannelTestSuite {};\n\n} // namespace\n\nclass ContextIsNotJoinedTest : public ChannelTestCase {\n  // Because it's static we must define it out-of-line (until C++-17, where we\n  // can mark this inline).\n  static const std::string kReady;\n\n public:\n  void run(ChannelTestHelper* helper) override {\n    auto addr = \"127.0.0.1\";\n\n    helper_ = helper;\n    peers_ = helper_->makePeerGroup();\n    peers_->spawn(\n        [&] {\n          auto context = tensorpipe::transport::uv::create();\n          context->setId(\"server_harness\");\n\n          auto listener = context->listen(addr);\n\n          std::promise<std::shared_ptr<tensorpipe::transport::Connection>>\n              connectionProm;\n          listener->accept(\n              [&](const tensorpipe::Error& error,\n                  std::shared_ptr<tensorpipe::transport::Connection>\n                      connection) {\n                ASSERT_FALSE(error) << error.what();\n                connectionProm.set_value(std::move(connection));\n              });\n\n          peers_->send(PeerGroup::kClient, listener->addr());\n          server(connectionProm.get_future().get());\n\n          context->join();\n        },\n        [&] {\n          auto context = tensorpipe::transport::uv::create();\n          context->setId(\"client_harness\");\n\n          auto laddr = peers_->recv(PeerGroup::kClient);\n          client(context->connect(laddr));\n\n          context->join();\n        });\n  }\n\n  void server(std::shared_ptr<tensorpipe::transport::Connection> conn) {\n    std::shared_ptr<tensorpipe::channel::Context> context =\n        this->helper_->makeContext(\"server\");\n    this->peers_->send(PeerGroup::kClient, kReady);\n    context->createChannel(\n        {std::move(conn)}, tensorpipe::channel::Endpoint::kListen);\n  }\n\n  void client(std::shared_ptr<tensorpipe::transport::Connection> conn) {\n    std::shared_ptr<tensorpipe::channel::Context> context =\n        this->helper_->makeContext(\"client\");\n    EXPECT_EQ(kReady, this->peers_->recv(PeerGroup::kClient));\n    context->createChannel(\n        {std::move(conn)}, tensorpipe::channel::Endpoint::kConnect);\n  }\n\n protected:\n  ChannelTestHelper* helper_;\n  std::shared_ptr<PeerGroup> peers_;\n};\n\nconst std::string ContextIsNotJoinedTest::kReady = \"ready\";\n\nCHANNEL_TEST(MptChannelTestSuite, ContextIsNotJoined);\n\nINSTANTIATE_TEST_CASE_P(Mpt, ChannelTestSuite, ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(Mpt, CpuChannelTestSuite, ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(Mpt, MptChannelTestSuite, ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/channel/xth/xth_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/channel/xth/factory.h>\n#include <tensorpipe/test/channel/channel_test_cpu.h>\n\nnamespace {\n\nclass XthChannelTestHelper : public CpuChannelTestHelper {\n protected:\n  std::shared_ptr<tensorpipe::channel::Context> makeContextInternal(\n      std::string id) override {\n    auto context = tensorpipe::channel::xth::create();\n    context->setId(std::move(id));\n    return context;\n  }\n};\n\nXthChannelTestHelper helper;\n\n} // namespace\n\nINSTANTIATE_TEST_CASE_P(Xth, ChannelTestSuite, ::testing::Values(&helper));\n\nINSTANTIATE_TEST_CASE_P(Xth, CpuChannelTestSuite, ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/common/cuda_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <cstring>\n\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/cuda_lib.h>\n#include <tensorpipe/test/peer_group.h>\n#include <tensorpipe/test/test_environment.h>\n\n#include <gtest/gtest.h>\n\nnamespace {\n\ntensorpipe::CudaLib getCudaLib() {\n  tensorpipe::Error error;\n  tensorpipe::CudaLib cudaLib;\n  std::tie(error, cudaLib) = tensorpipe::CudaLib::create();\n  EXPECT_FALSE(error) << error.what();\n  return cudaLib;\n}\n\n} // namespace\n\n// This tests whether we can retrieve the index of the device on which a pointer\n// resides under \"normal\" circumstances (in the same context where it was\n// allocated, or in a \"fresh\" thread).\nTEST(Cuda, DeviceForPointer) {\n  if (TestEnvironment::numCudaDevices() < 2) {\n    GTEST_SKIP() << \"Skipping test requiring >=2 CUDA devices.\";\n  }\n\n  ForkedThreadPeerGroup pg;\n  pg.spawn(\n      [&]() {\n        TP_CUDA_CHECK(cudaSetDevice(1));\n        void* ptr;\n        TP_CUDA_CHECK(cudaMalloc(&ptr, 1024));\n\n        EXPECT_EQ(tensorpipe::cudaDeviceForPointer(getCudaLib(), ptr), 1);\n\n        std::string ptrStr(\n            reinterpret_cast<char*>(&ptr),\n            reinterpret_cast<char*>(&ptr) + sizeof(void*));\n        pg.send(PeerGroup::kClient, ptrStr);\n      },\n      [&]() {\n        std::string ptrStr = pg.recv(PeerGroup::kClient);\n        void* ptr = *reinterpret_cast<void**>(&ptrStr[0]);\n\n        EXPECT_EQ(tensorpipe::cudaDeviceForPointer(getCudaLib(), ptr), 1);\n      });\n}\n\n// This tests whether we can retrieve the index of the device on which a pointer\n// resided after we've explicitly set the current device to an invalid value.\n// This is known to cause problems in recent versions of CUDA, possibly because\n// of a bug.\nTEST(Cuda, DeviceForPointerAfterReset) {\n  if (TestEnvironment::numCudaDevices() < 2) {\n    GTEST_SKIP() << \"Skipping test requiring >=2 CUDA devices.\";\n  }\n\n  ForkedThreadPeerGroup pg;\n  pg.spawn(\n      [&]() {\n        TP_CUDA_CHECK(cudaSetDevice(1));\n        void* ptr;\n        TP_CUDA_CHECK(cudaMalloc(&ptr, 1024));\n\n        TP_CUDA_CHECK(cudaSetDevice(0));\n\n        EXPECT_EQ(tensorpipe::cudaDeviceForPointer(getCudaLib(), ptr), 1);\n\n        std::string ptrStr(\n            reinterpret_cast<char*>(&ptr),\n            reinterpret_cast<char*>(&ptr) + sizeof(void*));\n        pg.send(PeerGroup::kClient, ptrStr);\n      },\n      [&]() {\n        std::string ptrStr = pg.recv(PeerGroup::kClient);\n        void* ptr = *reinterpret_cast<void**>(&ptrStr[0]);\n\n        TP_CUDA_CHECK(cudaSetDevice(0));\n\n        EXPECT_EQ(tensorpipe::cudaDeviceForPointer(getCudaLib(), ptr), 1);\n      });\n}\n"
  },
  {
    "path": "tensorpipe/test/common/defs_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/defs.h>\n\n#include <gtest/gtest.h>\n\nTEST(Defs, Exception) {\n  EXPECT_THROW(TP_THROW_EINVAL(), std::invalid_argument);\n  EXPECT_THROW(TP_THROW_EINVAL() << \"hola\", std::invalid_argument);\n  EXPECT_THROW(TP_THROW_EINVAL() << \"adioshola\", std::invalid_argument);\n  EXPECT_THROW(TP_THROW_SYSTEM(ENODATA) << \"adioshola\", std::system_error);\n  EXPECT_THROW(TP_THROW_SYSTEM(EBUSY), std::system_error);\n  EXPECT_THROW(TP_THROW_SYSTEM(EBUSY) << \"my message\", std::system_error);\n}\n"
  },
  {
    "path": "tensorpipe/test/common/epoll_loop_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <sys/eventfd.h>\n\n#include <deque>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/epoll_loop.h>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe;\n\nnamespace {\n\nclass Handler : public EpollLoop::EventHandler {\n public:\n  void handleEventsFromLoop(int events) override {\n    std::unique_lock<std::mutex> lock(m_);\n    events_.push_back(events);\n    cv_.notify_all();\n  }\n\n  int nextEvents() {\n    std::unique_lock<std::mutex> lock(m_);\n    cv_.wait(lock, [&]() { return !events_.empty(); });\n    int events = events_.front();\n    events_.pop_front();\n    return events;\n  }\n\n private:\n  std::mutex m_;\n  std::condition_variable cv_;\n  std::deque<int> events_;\n};\n\n// Monitor an fd for events and execute function when triggered.\n//\n// The lifetime of an instance dictates when the specified function\n// may be called. The function is guaranteed to not be called after\n// the monitor has been destructed.\n//\nclass FunctionEventHandler\n    : public EpollLoop::EventHandler,\n      public std::enable_shared_from_this<FunctionEventHandler> {\n public:\n  using TFunction = std::function<void(FunctionEventHandler&)>;\n\n  FunctionEventHandler(\n      DeferredExecutor& deferredExecutor,\n      EpollLoop& loop,\n      int fd,\n      int event,\n      TFunction fn);\n\n  ~FunctionEventHandler() override;\n\n  void start();\n\n  void cancel();\n\n  void handleEventsFromLoop(int events) override;\n\n private:\n  DeferredExecutor& deferredExecutor_;\n  EpollLoop& loop_;\n  const int fd_;\n  const int event_;\n  TFunction fn_;\n\n  std::mutex mutex_;\n  bool cancelled_{false};\n};\n\nFunctionEventHandler::FunctionEventHandler(\n    DeferredExecutor& deferredExecutor,\n    EpollLoop& loop,\n    int fd,\n    int event,\n    TFunction fn)\n    : deferredExecutor_(deferredExecutor),\n      loop_(loop),\n      fd_(fd),\n      event_(event),\n      fn_(std::move(fn)) {}\n\nFunctionEventHandler::~FunctionEventHandler() {\n  cancel();\n}\n\nvoid FunctionEventHandler::start() {\n  deferredExecutor_.runInLoop(\n      [&]() { loop_.registerDescriptor(fd_, event_, shared_from_this()); });\n}\n\nvoid FunctionEventHandler::cancel() {\n  std::unique_lock<std::mutex> lock(mutex_);\n  if (!cancelled_) {\n    deferredExecutor_.runInLoop([&]() {\n      loop_.unregisterDescriptor(fd_);\n      cancelled_ = true;\n    });\n  }\n}\n\nvoid FunctionEventHandler::handleEventsFromLoop(int events) {\n  if (events & event_) {\n    fn_(*this);\n  }\n}\n\n// Instantiates an event monitor for the specified fd.\ntemplate <typename T>\nstd::shared_ptr<FunctionEventHandler> createMonitor(\n    DeferredExecutor& reactor,\n    EpollLoop& loop,\n    std::shared_ptr<T> shared,\n    int fd,\n    int event,\n    std::function<void(T&, FunctionEventHandler&)> fn) {\n  auto handler = std::make_shared<FunctionEventHandler>(\n      reactor,\n      loop,\n      fd,\n      event,\n      [weak{std::weak_ptr<T>{shared}},\n       fn{std::move(fn)}](FunctionEventHandler& handler) {\n        auto shared = weak.lock();\n        if (shared) {\n          fn(*shared, handler);\n        }\n      });\n  handler->start();\n  return handler;\n}\n\n} // namespace\n\nTEST(ShmLoop, RegisterUnregister) {\n  OnDemandDeferredExecutor deferredExecutor;\n  EpollLoop loop{deferredExecutor};\n  auto handler = std::make_shared<Handler>();\n  auto efd = Fd(eventfd(0, EFD_NONBLOCK));\n\n  {\n    // Test if writable (always).\n    deferredExecutor.runInLoop([&]() {\n      loop.registerDescriptor(efd.fd(), EPOLLOUT | EPOLLONESHOT, handler);\n    });\n    ASSERT_EQ(handler->nextEvents(), EPOLLOUT);\n    efd.writeOrThrow<uint64_t>(1337);\n\n    // Test if readable (only if previously written to).\n    deferredExecutor.runInLoop([&]() {\n      loop.registerDescriptor(efd.fd(), EPOLLIN | EPOLLONESHOT, handler);\n    });\n    ASSERT_EQ(handler->nextEvents(), EPOLLIN);\n    ASSERT_EQ(efd.readOrThrow<uint64_t>(), 1337);\n\n    // Test if we can unregister the descriptor.\n    deferredExecutor.runInLoop([&]() { loop.unregisterDescriptor(efd.fd()); });\n  }\n\n  loop.join();\n}\n\nTEST(ShmLoop, Monitor) {\n  OnDemandDeferredExecutor deferredExecutor;\n  EpollLoop loop{deferredExecutor};\n  auto efd = Fd(eventfd(0, EFD_NONBLOCK));\n  constexpr uint64_t kValue = 1337;\n\n  {\n    std::mutex mutex;\n    std::condition_variable cv;\n    bool done = false;\n\n    // Test if writable (always).\n    auto shared = std::make_shared<int>(1338);\n    auto monitor = createMonitor<int>(\n        deferredExecutor,\n        loop,\n        shared,\n        efd.fd(),\n        EPOLLOUT,\n        [&](int& i, FunctionEventHandler& handler) {\n          EXPECT_EQ(i, 1338);\n          efd.writeOrThrow<uint64_t>(kValue);\n          handler.cancel();\n          {\n            std::unique_lock<std::mutex> lock(mutex);\n            done = true;\n            cv.notify_all();\n          }\n        });\n\n    // Wait for monitor to trigger and perform a write.\n    std::unique_lock<std::mutex> lock(mutex);\n    cv.wait(lock, [&]() { return done; });\n  }\n\n  {\n    std::mutex mutex;\n    std::condition_variable cv;\n    bool done = false;\n    uint64_t value = 0;\n\n    // Test if readable (only if previously written to).\n    auto shared = std::make_shared<int>(1338);\n    auto monitor = createMonitor<int>(\n        deferredExecutor,\n        loop,\n        shared,\n        efd.fd(),\n        EPOLLIN,\n        [&](int& i, FunctionEventHandler& handler) {\n          EXPECT_EQ(i, 1338);\n          value = efd.readOrThrow<uint64_t>();\n          handler.cancel();\n          {\n            std::unique_lock<std::mutex> lock(mutex);\n            done = true;\n            cv.notify_all();\n          }\n        });\n\n    // Wait for monitor to trigger and perform a read.\n    std::unique_lock<std::mutex> lock(mutex);\n    cv.wait(lock, [&]() { return done; });\n\n    // Verify we read the correct value.\n    ASSERT_EQ(value, kValue);\n  }\n\n  loop.join();\n}\n\nTEST(ShmLoop, Defer) {\n  OnDemandDeferredExecutor deferredExecutor;\n  auto promise = std::make_shared<std::promise<void>>();\n  auto future = promise->get_future();\n  deferredExecutor.deferToLoop([promise]() { promise->set_value(); });\n  future.wait();\n  ASSERT_TRUE(future.valid());\n}\n"
  },
  {
    "path": "tensorpipe/test/common/ringbuffer_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <array>\n\n#include <tensorpipe/common/ringbuffer.h>\n#include <tensorpipe/common/ringbuffer_role.h>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe;\n\nstruct TestData {\n  uint16_t a;\n  uint16_t b;\n  uint16_t c;\n\n  bool operator==(const TestData& other) const {\n    return a == other.a && b == other.b && c == other.c;\n  }\n};\n\nconstexpr static int kNumRingbufferRoles = 2;\nconstexpr static int kConsumerRoleIdx = 0;\nconstexpr static int kProducerRoleIdx = 1;\nusing Consumer = RingBufferRole<kNumRingbufferRoles, kConsumerRoleIdx>;\nusing Producer = RingBufferRole<kNumRingbufferRoles, kProducerRoleIdx>;\n\n// Holds and owns the memory for the ringbuffer's header and data.\nclass RingBufferStorage {\n public:\n  explicit RingBufferStorage(size_t size) : header_(size) {}\n\n  RingBuffer<kNumRingbufferRoles> getRb() {\n    return {&header_, data_.get()};\n  }\n\n private:\n  RingBufferHeader<kNumRingbufferRoles> header_;\n  std::unique_ptr<uint8_t[]> data_ =\n      std::make_unique<uint8_t[]>(header_.kDataPoolByteSize);\n};\n\nsize_t usedSize(RingBuffer<kNumRingbufferRoles>& rb) {\n  return rb.getHeader().template readMarker<kProducerRoleIdx>() -\n      rb.getHeader().template readMarker<kConsumerRoleIdx>();\n}\n\nTEST(RingBuffer, WriteCopy) {\n  EXPECT_EQ(sizeof(TestData), 6);\n\n  // 16 bytes buffer. Fits two full TestData (each 6).\n  size_t size = 1u << 4;\n\n  RingBufferStorage storage(size);\n  RingBuffer<kNumRingbufferRoles> rb = storage.getRb();\n  // Make a producer.\n  Producer p{rb};\n  // Make a consumer.\n  Consumer c{rb};\n\n  EXPECT_EQ(usedSize(rb), 0);\n\n  TestData d0{.a = 0xBA98, .b = 0x7654, .c = 0xA312};\n  TestData d1{.a = 0xA987, .b = 0x7777, .c = 0x2812};\n  TestData d2{.a = 0xFFFF, .b = 0x3333, .c = 0x1212};\n\n  {\n    ssize_t ret = p.write(&d0, sizeof(d0));\n    EXPECT_EQ(ret, sizeof(TestData));\n  }\n  EXPECT_EQ(usedSize(rb), 6);\n\n  {\n    ssize_t ret = p.write(&d1, sizeof(d1));\n    EXPECT_EQ(ret, sizeof(TestData));\n  }\n  EXPECT_EQ(usedSize(rb), 12);\n\n  {\n    ssize_t ret = p.write(&d2, sizeof(d2));\n    EXPECT_EQ(ret, -ENODATA) << \"Needs 2 more bytes to write the 6 required, \"\n                                \"because 12 out of 16 are used.\";\n  }\n\n  TestData r;\n\n  {\n    ssize_t ret = c.read(&r, sizeof(r));\n    EXPECT_EQ(ret, sizeof(r));\n    EXPECT_EQ(r, d0);\n  }\n\n  {\n    ssize_t ret = c.read(&r, sizeof(r));\n    EXPECT_EQ(ret, sizeof(r));\n    EXPECT_EQ(r, d1);\n  }\n  // It should be empty by now.\n  EXPECT_EQ(usedSize(rb), 0);\n\n  {\n    ssize_t ret = p.write(&d2, sizeof(d2));\n    EXPECT_EQ(ret, sizeof(TestData));\n  }\n  {\n    ssize_t ret = c.read(&r, sizeof(r));\n    EXPECT_EQ(ret, sizeof(r));\n    EXPECT_EQ(r, d2);\n  }\n  // It should be empty by now.\n  EXPECT_EQ(usedSize(rb), 0);\n}\n\nTEST(RingBuffer, ReadMultipleElems) {\n  // 256 bytes buffer.\n  size_t size = 1u << 8u;\n\n  RingBufferStorage storage(size);\n  RingBuffer<kNumRingbufferRoles> rb = storage.getRb();\n  // Make a producer.\n  Producer p{rb};\n  // Make a consumer.\n  Consumer c{rb};\n\n  EXPECT_EQ(usedSize(rb), 0);\n\n  uint16_t n = 0xACAC; // fits 128 times\n\n  {\n    for (int i = 0; i < 128; ++i) {\n      ssize_t ret = p.write(&n, sizeof(n));\n      EXPECT_EQ(ret, sizeof(n));\n    }\n\n    // It must be full by now.\n    EXPECT_EQ(usedSize(rb), 256);\n\n    ssize_t ret = p.write(&n, sizeof(n));\n    EXPECT_EQ(ret, -ENODATA);\n  }\n\n  {\n    uint8_t b = 0xEE;\n\n    ssize_t ret = p.write(&b, sizeof(b));\n    EXPECT_EQ(ret, -ENODATA) << \"Needs an extra byte\";\n  }\n\n  {\n    // read the three bytes at once.\n    ssize_t ret;\n    ret = c.startTx();\n    EXPECT_EQ(ret, 0);\n\n    std::array<uint8_t, 3> r;\n    ret = c.readInTx</*AllowPartial=*/false>(r.data(), sizeof(r));\n    EXPECT_EQ(ret, 3);\n    EXPECT_EQ(r[0], 0xAC);\n    EXPECT_EQ(r[1], 0xAC);\n    EXPECT_EQ(r[2], 0xAC);\n    ret = c.commitTx();\n    EXPECT_EQ(ret, 0);\n  }\n\n  {\n    // read 253 bytes at once.\n    ssize_t ret;\n    ret = c.startTx();\n    EXPECT_EQ(ret, 0);\n\n    std::array<uint8_t, 253> r;\n    ret = c.readInTx</*AllowPartial=*/false>(r.data(), sizeof(r));\n    EXPECT_EQ(ret, 253);\n    for (int i = 0; i < 253; ++i) {\n      EXPECT_EQ(r[i], 0xAC);\n    }\n    ret = c.commitTx();\n    EXPECT_EQ(ret, 0);\n  }\n\n  {\n    // No more elements\n    ssize_t ret;\n    ret = c.startTx();\n    EXPECT_EQ(ret, 0);\n    uint8_t ch;\n    ret = c.readInTx</*AllowPartial=*/false>(&ch, sizeof(ch));\n    EXPECT_EQ(ret, -ENODATA);\n    ret = c.cancelTx();\n    EXPECT_EQ(ret, 0);\n    EXPECT_TRUE(!c.inTx()) << \"Canceled transaction should've been canceled\";\n  }\n}\n\nTEST(RingBuffer, CopyWrapping) {\n  // 8 bytes buffer.\n  size_t size = 1u << 3;\n\n  RingBufferStorage storage(size);\n  RingBuffer<kNumRingbufferRoles> rb = storage.getRb();\n  // Make a producer.\n  Producer p{rb};\n  // Make a consumer.\n  Consumer c{rb};\n\n  EXPECT_EQ(usedSize(rb), 0);\n\n  uint8_t ch = 0xA7;\n  uint64_t n = 0xFFFFFFFFFFFFFFFF;\n\n  // Put one byte.\n  EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 0);\n  EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 0);\n  EXPECT_EQ(usedSize(rb), 0);\n  ssize_t ret = p.write(&ch, sizeof(ch));\n  EXPECT_EQ(ret, sizeof(ch));\n  EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 1);\n  EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 0);\n  EXPECT_EQ(usedSize(rb), 1);\n\n  // Next 8 bytes won't fit.\n  ret = p.write(&n, sizeof(n));\n  EXPECT_EQ(ret, -ENODATA)\n      << \"Needs an extra byte to write the 8 bytes element. \"\n         \"Capacity 8, used 1.\";\n\n  // Remove the one byte in, now head is one off.\n  uint8_t cr;\n  uint64_t nr;\n\n  ret = c.read(&cr, sizeof(cr));\n  EXPECT_EQ(ret, sizeof(cr));\n  EXPECT_EQ(cr, ch);\n  EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 1);\n  EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 1);\n\n  // Next 8 bytes will fit, but wrap.\n  ret = p.write(&n, sizeof(n));\n  EXPECT_EQ(ret, sizeof(n));\n  EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 9);\n  EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 1);\n\n  ret = c.read(&nr, sizeof(nr));\n  EXPECT_EQ(ret, sizeof(nr));\n  EXPECT_EQ(nr, n);\n  EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 9);\n  EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 9);\n}\n\nTEST(RingBuffer, ReadTxWrappingOneCons) {\n  // 8 bytes buffer.\n  size_t size = 1u << 3;\n\n  RingBufferStorage storage(size);\n  RingBuffer<kNumRingbufferRoles> rb = storage.getRb();\n  // Make a producer.\n  Producer p{rb};\n  // Make a consumer.\n  Consumer c1{rb};\n\n  EXPECT_EQ(usedSize(rb), 0);\n\n  uint8_t ch = 0xA7;\n  uint64_t n = 0xFFFFFFFFFFFFFFFF;\n\n  // Put one byte.\n  {\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 0);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 0);\n    EXPECT_EQ(usedSize(rb), 0);\n    ssize_t ret = p.write(&ch, sizeof(ch));\n    EXPECT_EQ(ret, sizeof(ch));\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 1);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 0);\n    EXPECT_EQ(usedSize(rb), 1);\n  }\n\n  // Next 8 bytes won't fit.\n  {\n    ssize_t ret = p.write(&n, sizeof(n));\n    EXPECT_EQ(ret, -ENODATA)\n        << \"Needs an extra byte to write the 8 bytes element. \"\n           \"Capacity 8, used 1.\";\n  }\n\n  // Remove the one byte in, now head is one off.\n  EXPECT_FALSE(c1.inTx());\n\n  {\n    // Start c1 read Tx\n    ssize_t ret;\n    ret = c1.startTx();\n    EXPECT_EQ(ret, 0);\n\n    uint8_t rch;\n    ret = c1.readInTx</*AllowPartial=*/false>(&rch, sizeof(rch));\n    EXPECT_EQ(ret, sizeof(uint8_t));\n    EXPECT_EQ(rch, ch);\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 1);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 0);\n    EXPECT_TRUE(c1.inTx());\n  }\n\n  {\n    // Complete c1's Tx.\n    ssize_t ret = c1.commitTx();\n    EXPECT_EQ(ret, 0);\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 1);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 1);\n  }\n  {\n    // Retrying to commit should fail.\n    ssize_t ret = c1.commitTx();\n    EXPECT_EQ(ret, -EINVAL);\n  }\n\n  {\n    // Next 8 bytes will fit, but wrap.\n    ssize_t ret = p.write(&n, sizeof(n));\n    EXPECT_EQ(ret, sizeof(n));\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 9);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 1);\n  }\n\n  {\n    // Start c1 read Tx again.\n    ssize_t ret;\n    ret = c1.startTx();\n    EXPECT_EQ(ret, 0);\n\n    uint64_t rn;\n    ret = c1.readInTx</*AllowPartial=*/false>(&rn, sizeof(rn));\n    EXPECT_EQ(ret, sizeof(uint64_t));\n    EXPECT_EQ(rn, n);\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 9);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 1);\n    EXPECT_TRUE(c1.inTx());\n  }\n\n  {\n    // Complete c1.\n    ssize_t ret = c1.commitTx();\n    EXPECT_EQ(ret, 0);\n    ret = c1.commitTx();\n    EXPECT_EQ(ret, -EINVAL);\n  }\n\n  {\n    // Next 8 bytes will fit, but wrap.\n    ssize_t ret = p.write(&n, sizeof(n));\n    EXPECT_EQ(ret, sizeof(n));\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 17);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 9);\n  }\n  {\n    ssize_t ret;\n    ret = c1.startTx();\n    EXPECT_EQ(ret, 0);\n\n    uint64_t rn;\n    ret = c1.readInTx</*AllowPartial=*/false>(&rn, sizeof(rn));\n    EXPECT_EQ(ret, sizeof(uint64_t));\n    EXPECT_EQ(rn, n);\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 17);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 9);\n  }\n\n  {\n    // Cancel tx, data should be readable again.\n    ssize_t ret = c1.cancelTx();\n    EXPECT_EQ(ret, 0);\n  }\n\n  {\n    // Now c1 can read.\n    ssize_t ret;\n    ret = c1.startTx();\n    EXPECT_EQ(ret, 0);\n\n    uint64_t rn;\n    ret = c1.readInTx</*AllowPartial=*/false>(&rn, sizeof(rn));\n    EXPECT_EQ(ret, sizeof(uint64_t));\n    EXPECT_EQ(rn, n);\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 17);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 9);\n  }\n\n  {\n    // Commit succeds.\n    ssize_t ret = c1.commitTx();\n    EXPECT_EQ(ret, 0);\n    EXPECT_FALSE(c1.inTx());\n  }\n}\n\nTEST(RingBuffer, ReadTxWrapping) {\n  // 8 bytes buffer.\n  size_t size = 1u << 3;\n\n  RingBufferStorage storage(size);\n  RingBuffer<kNumRingbufferRoles> rb = storage.getRb();\n  // Make a producer.\n  Producer p{rb};\n  // Make consumers.\n  Consumer c1{rb};\n  Consumer c2{rb};\n\n  EXPECT_EQ(usedSize(rb), 0);\n\n  uint8_t ch = 0xA7;\n  uint64_t n = 0x3333333333333333;\n\n  // Put one byte.\n  {\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 0);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 0);\n    EXPECT_EQ(usedSize(rb), 0);\n    ssize_t ret = p.write(&ch, sizeof(ch));\n    EXPECT_EQ(ret, sizeof(ch));\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 1);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 0);\n    EXPECT_EQ(usedSize(rb), 1);\n  }\n\n  // Next 8 bytes won't fit.\n  {\n    ssize_t ret = p.write(&n, sizeof(n));\n    EXPECT_EQ(ret, -ENODATA)\n        << \"Needs an extra byte to write the 8 bytes element. \"\n           \"Capacity 8, used 1.\";\n  }\n\n  // Remove the one byte in, now head is one off.\n  EXPECT_FALSE(c1.inTx());\n  EXPECT_FALSE(c2.inTx());\n\n  {\n    // Start c1 read Tx\n    ssize_t ret;\n    ret = c1.startTx();\n    EXPECT_EQ(ret, 0);\n\n    uint8_t rch;\n    ret = c1.readInTx</*AllowPartial=*/false>(&rch, sizeof(rch));\n    EXPECT_EQ(ret, sizeof(uint8_t));\n    EXPECT_EQ(rch, ch);\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 1);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 0);\n    EXPECT_TRUE(c1.inTx());\n  }\n\n  {\n    // Complete c1's Tx.\n    ssize_t ret = c1.commitTx();\n    EXPECT_EQ(ret, 0);\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 1);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 1);\n  }\n  {\n    // Retrying to commit should fail.\n    ssize_t ret = c1.commitTx();\n    EXPECT_EQ(ret, -EINVAL);\n  }\n\n  {\n    // Next 8 bytes will fit, but wrap.\n    ssize_t ret = p.write(&n, sizeof(n));\n    EXPECT_EQ(ret, sizeof(n));\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 9);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 1);\n  }\n\n  {\n    // Start c1 read Tx again.\n    ssize_t ret;\n    ret = c1.startTx();\n    EXPECT_EQ(ret, 0);\n\n    uint64_t rn;\n    ret = c1.readInTx</*AllowPartial=*/false>(&rn, sizeof(rn));\n    EXPECT_EQ(ret, sizeof(uint64_t));\n    EXPECT_EQ(rn, n);\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 9);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 1);\n    EXPECT_TRUE(c1.inTx());\n  }\n\n  {\n    // Try to start read tx before c1 completing and get -EAGAIN.\n    ssize_t ret;\n    ret = c2.startTx();\n    EXPECT_EQ(ret, -EAGAIN);\n  }\n\n  {\n    // Complete c1.\n    ssize_t ret = c1.commitTx();\n    EXPECT_EQ(ret, 0);\n    ret = c1.commitTx();\n    EXPECT_EQ(ret, -EINVAL);\n  }\n\n  {\n    // Next 8 bytes will fit, but wrap.\n    ssize_t ret = p.write(&n, sizeof(n));\n    EXPECT_EQ(ret, sizeof(n));\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 17);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 9);\n  }\n  {\n    ssize_t ret;\n    ret = c2.startTx();\n    EXPECT_EQ(ret, 0);\n\n    uint64_t rn;\n    ret = c2.readInTx</*AllowPartial=*/false>(&rn, sizeof(rn));\n    EXPECT_EQ(ret, sizeof(uint64_t));\n    EXPECT_EQ(rn, n);\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 17);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 9);\n  }\n\n  {\n    // Cancel tx, data should be readable again.\n    ssize_t ret = c2.cancelTx();\n    EXPECT_EQ(ret, 0);\n  }\n\n  {\n    // Now c1 can read.\n    ssize_t ret;\n    ret = c1.startTx();\n    EXPECT_EQ(ret, 0);\n\n    uint64_t rn;\n    ret = c1.readInTx</*AllowPartial=*/false>(&rn, sizeof(rn));\n    EXPECT_EQ(ret, sizeof(uint64_t));\n    EXPECT_EQ(rn, n);\n    EXPECT_EQ(rb.getHeader().template readMarker<kProducerRoleIdx>(), 17);\n    EXPECT_EQ(rb.getHeader().template readMarker<kConsumerRoleIdx>(), 9);\n  }\n\n  {\n    // Commit succeds.\n    ssize_t ret = c1.commitTx();\n    EXPECT_EQ(ret, 0);\n    EXPECT_FALSE(c1.inTx());\n    EXPECT_FALSE(c2.inTx());\n  }\n}\n\nTEST(RingBuffer, accessContiguousInTx) {\n  // 256 bytes buffer.\n  size_t size = 1u << 8u;\n\n  RingBufferStorage storage(size);\n  RingBuffer<kNumRingbufferRoles> rb = storage.getRb();\n  // Make a producer.\n  Producer p{rb};\n  // Make a consumer.\n  Consumer c{rb};\n\n  EXPECT_EQ(usedSize(rb), 0);\n\n  // Use different values for the three writing passes to tell them apart.\n  uint16_t value1 = 0xACAC; // fits 128 times\n  uint16_t value2 = 0xDCDC; // fits 128 times\n  uint16_t value3 = 0xEFEF; // fits 128 times\n\n  {\n    for (int i = 0; i < 128; ++i) {\n      ssize_t ret = p.write(&value1, sizeof(value1));\n      EXPECT_EQ(ret, sizeof(value1));\n    }\n\n    // It must be full by now.\n    EXPECT_EQ(usedSize(rb), 256);\n\n    uint8_t b = 0xEE;\n    ssize_t ret = p.write(&b, sizeof(b));\n    EXPECT_EQ(ret, -ENODATA);\n  }\n\n  {\n    // Read a 128-byte buffer that is left-aligned with the start.\n    ssize_t ret;\n    ret = c.startTx();\n    EXPECT_EQ(ret, 0);\n\n    std::array<Consumer::Buffer, 2> buffers;\n    std::tie(ret, buffers) = c.accessContiguousInTx</*AllowPartial=*/true>(128);\n    EXPECT_EQ(ret, 1);\n    EXPECT_EQ(buffers[0].len, 128);\n    for (int i = 0; i < 128; ++i) {\n      EXPECT_EQ(buffers[0].ptr[i], 0xAC);\n    }\n    ret = c.commitTx();\n    EXPECT_EQ(ret, 0);\n    EXPECT_EQ(usedSize(rb), 128);\n  }\n\n  {\n    for (int i = 0; i < 64; ++i) {\n      ssize_t ret = p.write(&value2, sizeof(value2));\n      EXPECT_EQ(ret, sizeof(value2));\n    }\n\n    // It must be full again by now.\n    EXPECT_EQ(usedSize(rb), 256);\n  }\n\n  {\n    // Read a 256-byte buffer that wraps around halfway through.\n    ssize_t ret;\n    ret = c.startTx();\n    EXPECT_EQ(ret, 0);\n\n    std::array<Consumer::Buffer, 2> buffers;\n    std::tie(ret, buffers) = c.accessContiguousInTx</*AllowPartial=*/true>(256);\n    EXPECT_EQ(ret, 2);\n    EXPECT_EQ(buffers[0].len, 128);\n    for (int i = 0; i < 128; ++i) {\n      EXPECT_EQ(buffers[0].ptr[i], 0xAC);\n    }\n    EXPECT_EQ(buffers[1].len, 128);\n    for (int i = 0; i < 128; ++i) {\n      EXPECT_EQ(buffers[1].ptr[i], 0xDC);\n    }\n    ret = c.commitTx();\n    EXPECT_EQ(ret, 0);\n    EXPECT_EQ(usedSize(rb), 0);\n  }\n\n  {\n    for (int i = 0; i < 64; ++i) {\n      ssize_t ret = p.write(&value2, sizeof(value2));\n      EXPECT_EQ(ret, sizeof(value2));\n    }\n    for (int i = 0; i < 64; ++i) {\n      ssize_t ret = p.write(&value3, sizeof(value3));\n      EXPECT_EQ(ret, sizeof(value3));\n    }\n\n    // It must be full again by now.\n    EXPECT_EQ(usedSize(rb), 256);\n  }\n\n  {\n    // Read a 128-byte buffer that is right-aligned with the end.\n    ssize_t ret;\n    ret = c.startTx();\n    EXPECT_EQ(ret, 0);\n\n    std::array<Consumer::Buffer, 2> buffers;\n    std::tie(ret, buffers) = c.accessContiguousInTx</*AllowPartial=*/true>(128);\n    EXPECT_EQ(ret, 1);\n    EXPECT_EQ(buffers[0].len, 128);\n    for (int i = 0; i < 128; ++i) {\n      EXPECT_EQ(buffers[0].ptr[i], 0xDC);\n    }\n    ret = c.commitTx();\n    EXPECT_EQ(ret, 0);\n    EXPECT_EQ(usedSize(rb), 128);\n  }\n\n  {\n    for (int i = 0; i < 64; ++i) {\n      ssize_t ret = p.write(&value3, sizeof(value3));\n      EXPECT_EQ(ret, sizeof(value3));\n    }\n\n    // It must be full again by now.\n    EXPECT_EQ(usedSize(rb), 256);\n  }\n\n  {\n    // Reading the whole 256 bytes.\n    ssize_t ret;\n    ret = c.startTx();\n    EXPECT_EQ(ret, 0);\n\n    std::array<Consumer::Buffer, 2> buffers;\n    std::tie(ret, buffers) = c.accessContiguousInTx</*AllowPartial=*/true>(256);\n    EXPECT_EQ(ret, 1);\n    EXPECT_EQ(buffers[0].len, 256);\n    for (int i = 0; i < 256; ++i) {\n      EXPECT_EQ(buffers[0].ptr[i], 0xEF);\n    }\n    ret = c.commitTx();\n    EXPECT_EQ(ret, 0);\n    EXPECT_EQ(usedSize(rb), 0);\n  }\n\n  {\n    // Attempt reading from empty buffer.\n    ssize_t ret;\n    ret = c.startTx();\n    EXPECT_EQ(ret, 0);\n\n    std::array<Consumer::Buffer, 2> buffers;\n    std::tie(ret, buffers) = c.accessContiguousInTx</*AllowPartial=*/true>(200);\n    EXPECT_EQ(ret, 0);\n    ret = c.commitTx();\n    EXPECT_EQ(ret, 0);\n    EXPECT_EQ(usedSize(rb), 0);\n  }\n}\n"
  },
  {
    "path": "tensorpipe/test/common/shm_ringbuffer_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/ringbuffer.h>\n#include <tensorpipe/common/ringbuffer_role.h>\n#include <tensorpipe/common/shm_ringbuffer.h>\n#include <tensorpipe/common/shm_segment.h>\n#include <tensorpipe/common/socket.h>\n\n#include <sys/eventfd.h>\n#include <sys/socket.h>\n#include <sys/types.h>\n\n#include <thread>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe;\n\nconstexpr static int kNumRingbufferRoles = 2;\nusing Consumer = RingBufferRole<kNumRingbufferRoles, 0>;\nusing Producer = RingBufferRole<kNumRingbufferRoles, 1>;\n\n// Same process produces and consumes share memory through different mappings.\nTEST(ShmRingBuffer, SameProducerConsumer) {\n  Fd headerFd;\n  Fd dataFd;\n  {\n    // Producer part.\n    // Buffer large enough to fit all data and persistent\n    // (needs to be unlinked up manually).\n    Error error;\n    ShmSegment headerSegment;\n    ShmSegment dataSegment;\n    RingBuffer<kNumRingbufferRoles> rb;\n    std::tie(error, headerSegment, dataSegment, rb) =\n        createShmRingBuffer<kNumRingbufferRoles>(256 * 1024);\n    Producer prod{rb};\n\n    // Producer loop. It all fits in buffer.\n    int i = 0;\n    while (i < 2000) {\n      ssize_t ret = prod.write(&i, sizeof(i));\n      EXPECT_EQ(ret, sizeof(i));\n      ++i;\n    }\n\n    // Duplicate the file descriptors so that the shared memory remains alive\n    // when the original fds are closed by the segments' destructors.\n    headerFd = Fd(::dup(headerSegment.getFd()));\n    dataFd = Fd(::dup(dataSegment.getFd()));\n  }\n\n  {\n    // Consumer part.\n    // Map file again (to a different address) and consume it.\n    Error error;\n    ShmSegment headerSegment;\n    ShmSegment dataSegment;\n    RingBuffer<kNumRingbufferRoles> rb;\n    std::tie(error, headerSegment, dataSegment, rb) =\n        loadShmRingBuffer<kNumRingbufferRoles>(\n            std::move(headerFd), std::move(dataFd));\n    Consumer cons{rb};\n\n    int i = 0;\n    while (i < 2000) {\n      int value;\n      ssize_t ret = cons.read(&value, sizeof(value));\n      EXPECT_EQ(ret, sizeof(value));\n      EXPECT_EQ(value, i);\n      ++i;\n    }\n  }\n};\n\nTEST(ShmRingBuffer, SingleProducer_SingleConsumer) {\n  int sockFds[2];\n  {\n    int rv = socketpair(AF_UNIX, SOCK_STREAM, 0, sockFds);\n    if (rv != 0) {\n      TP_THROW_SYSTEM(errno) << \"Failed to create socket pair\";\n    }\n  }\n\n  int eventFd = eventfd(0, 0);\n  if (eventFd < 0) {\n    TP_THROW_SYSTEM(errno) << \"Failed to create event fd\";\n  }\n\n  int pid = fork();\n  if (pid < 0) {\n    TP_THROW_SYSTEM(errno) << \"Failed to fork\";\n  }\n\n  if (pid == 0) {\n    // child, the producer\n    // Make a scope so segments are destroyed even on exit(0).\n    {\n      Error error;\n      ShmSegment headerSegment;\n      ShmSegment dataSegment;\n      RingBuffer<kNumRingbufferRoles> rb;\n      std::tie(error, headerSegment, dataSegment, rb) =\n          createShmRingBuffer<kNumRingbufferRoles>(1024);\n      Producer prod{rb};\n\n      {\n        auto err = sendFdsToSocket(\n            sockFds[0], headerSegment.getFd(), dataSegment.getFd());\n        if (err) {\n          TP_THROW_ASSERT() << err.what();\n        }\n      }\n\n      int i = 0;\n      while (i < 2000) {\n        ssize_t ret = prod.write(&i, sizeof(i));\n        if (ret == -ENODATA) {\n          std::this_thread::yield();\n          continue;\n        }\n        EXPECT_EQ(ret, sizeof(i));\n        ++i;\n      }\n      // Because of buffer size smaller than amount of data written,\n      // producer cannot have completed the loop before consumer\n      // started consuming the data.\n\n      {\n        uint64_t c;\n        ::read(eventFd, &c, sizeof(uint64_t));\n      }\n    }\n    // Child exits. Careful when calling exit() directly, because\n    // it does not call destructors. We ensured shared_ptrs were\n    // destroyed before by calling exit(0).\n    exit(0);\n  }\n  // parent, the consumer\n\n  // Wait for other process to create buffer.\n  Fd headerFd;\n  Fd dataFd;\n  {\n    auto err = recvFdsFromSocket(sockFds[1], headerFd, dataFd);\n    if (err) {\n      TP_THROW_ASSERT() << err.what();\n    }\n  }\n  Error error;\n  ShmSegment headerSegment;\n  ShmSegment dataSegment;\n  RingBuffer<kNumRingbufferRoles> rb;\n  std::tie(error, headerSegment, dataSegment, rb) =\n      loadShmRingBuffer<kNumRingbufferRoles>(\n          std::move(headerFd), std::move(dataFd));\n  Consumer cons{rb};\n\n  int i = 0;\n  while (i < 2000) {\n    int value;\n    ssize_t ret = cons.read(&value, sizeof(value));\n    if (ret == -ENODATA) {\n      std::this_thread::yield();\n      continue;\n    }\n    EXPECT_EQ(ret, sizeof(value));\n    EXPECT_EQ(value, i);\n    ++i;\n  }\n  {\n    uint64_t c = 1;\n    ::write(eventFd, &c, sizeof(uint64_t));\n  }\n  ::close(eventFd);\n  ::close(sockFds[0]);\n  ::close(sockFds[1]);\n  // Wait for child to make gtest happy.\n  ::wait(nullptr);\n};\n"
  },
  {
    "path": "tensorpipe/test/common/shm_segment_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/shm_segment.h>\n#include <tensorpipe/common/socket.h>\n\n#include <sys/eventfd.h>\n#include <sys/socket.h>\n\n#include <thread>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe;\n\n// Same process produces and consumes share memory through different mappings.\nTEST(ShmSegment, SameProducerConsumer_Scalar) {\n  // Set affinity of producer to CPU zero so that consumer only has to read from\n  // that one CPU's buffer.\n  cpu_set_t cpuset;\n  CPU_ZERO(&cpuset);\n  CPU_SET(0, &cpuset);\n  sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);\n\n  // This must stay alive for the file descriptor to remain open.\n  Fd fd;\n  {\n    // Producer part.\n    Error error;\n    ShmSegment segment;\n    int* myIntPtr;\n    std::tie(error, segment, myIntPtr) = ShmSegment::create<int>();\n    ASSERT_FALSE(error) << error.what();\n    int& myInt = *myIntPtr;\n    myInt = 1000;\n\n    // Duplicate the file descriptor so that the shared memory remains alive\n    // when the original fd is closed by the segment's destructor.\n    fd = Fd(::dup(segment.getFd()));\n  }\n\n  {\n    // Consumer part.\n    // Map file again (to a different address) and consume it.\n    Error error;\n    ShmSegment segment;\n    int* myIntPtr;\n    std::tie(error, segment, myIntPtr) = ShmSegment::load<int>(std::move(fd));\n    ASSERT_FALSE(error) << error.what();\n    EXPECT_EQ(segment.getSize(), sizeof(int));\n    EXPECT_EQ(*myIntPtr, 1000);\n  }\n};\n\nTEST(ShmSegment, SingleProducer_SingleConsumer_Array) {\n  size_t numFloats = 330000;\n\n  int sockFds[2];\n  {\n    int rv = socketpair(AF_UNIX, SOCK_STREAM, 0, sockFds);\n    if (rv != 0) {\n      TP_THROW_SYSTEM(errno) << \"Failed to create socket pair\";\n    }\n  }\n\n  int eventFd = eventfd(0, 0);\n  if (eventFd < 0) {\n    TP_THROW_SYSTEM(errno) << \"Failed to create event fd\";\n  }\n\n  int pid = fork();\n  if (pid < 0) {\n    TP_THROW_SYSTEM(errno) << \"Failed to fork\";\n  }\n\n  if (pid == 0) {\n    // child, the producer\n    // Make a scope so shared_ptr's are released even on exit(0).\n    {\n      // use huge pages in creation and not in loading. This should only affects\n      // TLB overhead.\n      Error error;\n      ShmSegment segment;\n      float* myFloats;\n      std::tie(error, segment, myFloats) =\n          ShmSegment::create<float[]>(numFloats);\n      ASSERT_FALSE(error) << error.what();\n\n      for (int i = 0; i < numFloats; ++i) {\n        myFloats[i] = i;\n      }\n\n      {\n        auto err = sendFdsToSocket(sockFds[0], segment.getFd());\n        if (err) {\n          TP_THROW_ASSERT() << err.what();\n        }\n      }\n      {\n        uint64_t c;\n        ::read(eventFd, &c, sizeof(uint64_t));\n      }\n    }\n    // Child exits. Careful when calling exit() directly, because\n    // it does not call destructors. We ensured shared_ptrs were\n    // destroyed before by calling exit(0).\n    exit(0);\n  }\n\n  // parent, the consumer\n  Fd segmentFd;\n  {\n    auto err = recvFdsFromSocket(sockFds[1], segmentFd);\n    if (err) {\n      TP_THROW_ASSERT() << err.what();\n    }\n  }\n  Error error;\n  ShmSegment segment;\n  float* myFloats;\n  std::tie(error, segment, myFloats) =\n      ShmSegment::load<float[]>(std::move(segmentFd));\n  ASSERT_FALSE(error) << error.what();\n  EXPECT_EQ(numFloats * sizeof(float), segment.getSize());\n  for (int i = 0; i < numFloats; ++i) {\n    EXPECT_EQ(myFloats[i], i);\n  }\n  {\n    uint64_t c = 1;\n    ::write(eventFd, &c, sizeof(uint64_t));\n  }\n  ::close(eventFd);\n  ::close(sockFds[0]);\n  ::close(sockFds[1]);\n  // Wait for child to make gtest happy.\n  ::wait(nullptr);\n};\n"
  },
  {
    "path": "tensorpipe/test/common/system_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/common/system.h>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe;\n\nTEST(Pow2, isPow2) {\n  for (uint64_t i = 0; i < 63; ++i) {\n    EXPECT_TRUE(isPow2(1ull << i));\n  }\n\n  EXPECT_FALSE(isPow2(3));\n  EXPECT_FALSE(isPow2(5));\n  EXPECT_FALSE(isPow2(10));\n  EXPECT_FALSE(isPow2(15));\n  EXPECT_TRUE(isPow2(16));\n  EXPECT_FALSE(isPow2(17));\n  EXPECT_FALSE(isPow2(18));\n  EXPECT_FALSE(isPow2(25));\n  EXPECT_FALSE(isPow2(1028));\n}\n\nTEST(Pow2, nextPow2) {\n  for (uint64_t i = 0; i < 63; ++i) {\n    uint64_t p2 = 1ull << i;\n    uint64_t nextP2 = 1ull << (i + 1);\n    EXPECT_EQ(nextPow2(p2), p2);\n    EXPECT_EQ(nextPow2(p2 + 1), nextP2);\n  }\n}\n"
  },
  {
    "path": "tensorpipe/test/core/context_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <cstring>\n#include <exception>\n#include <future>\n#include <memory>\n#include <string>\n\n#include <gtest/gtest.h>\n\n#include <tensorpipe/tensorpipe.h>\n#include <tensorpipe/test/peer_group.h>\n\n#if TP_USE_CUDA\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/tensorpipe_cuda.h>\n#endif // TP_USE_CUDA\n\nusing namespace tensorpipe;\n\nnamespace {\n\n::testing::AssertionResult buffersAreEqual(\n    const void* ptr1,\n    const size_t len1,\n    const void* ptr2,\n    const size_t len2) {\n  if (ptr1 == nullptr && ptr2 == nullptr) {\n    if (len1 == 0 && len2 == 0) {\n      return ::testing::AssertionSuccess();\n    }\n    if (len1 != 0) {\n      return ::testing::AssertionFailure()\n          << \"first pointer is null but length isn't 0\";\n    }\n    if (len1 != 0) {\n      return ::testing::AssertionFailure()\n          << \"second pointer is null but length isn't 0\";\n    }\n  }\n  if (ptr1 == nullptr) {\n    return ::testing::AssertionFailure()\n        << \"first pointer is null but second one isn't\";\n  }\n  if (ptr2 == nullptr) {\n    return ::testing::AssertionFailure()\n        << \"second pointer is null but first one isn't\";\n  }\n  if (len1 != len2) {\n    return ::testing::AssertionFailure()\n        << \"first length is \" << len1 << \" but second one is \" << len2;\n  }\n  if (std::memcmp(ptr1, ptr2, len1) != 0) {\n    return ::testing::AssertionFailure() << \"buffer contents aren't equal\";\n  }\n  return ::testing::AssertionSuccess();\n}\n\n#if TP_USE_CUDA\nstd::vector<uint8_t> unwrapCudaBuffer(CudaBuffer b, size_t length) {\n  std::vector<uint8_t> result(length);\n  TP_CUDA_CHECK(cudaStreamSynchronize(b.stream));\n  TP_CUDA_CHECK(cudaMemcpy(result.data(), b.ptr, length, cudaMemcpyDefault));\n\n  return result;\n}\n#endif // TP_USE_CUDA\n\n::testing::AssertionResult descriptorAndAllocationMatchMessage(\n    const Descriptor& descriptor,\n    const Allocation& allocation,\n    const Message& message) {\n  EXPECT_EQ(descriptor.payloads.size(), allocation.payloads.size());\n  if (descriptor.payloads.size() != message.payloads.size()) {\n    return ::testing::AssertionFailure()\n        << \"descriptor has \" << descriptor.payloads.size()\n        << \" payloads but message has \" << message.payloads.size();\n  }\n  for (size_t idx = 0; idx < descriptor.payloads.size(); idx++) {\n    EXPECT_TRUE(buffersAreEqual(\n        allocation.payloads[idx].data,\n        descriptor.payloads[idx].length,\n        message.payloads[idx].data,\n        message.payloads[idx].length));\n  }\n  EXPECT_EQ(descriptor.tensors.size(), allocation.tensors.size());\n  if (descriptor.tensors.size() != message.tensors.size()) {\n    return ::testing::AssertionFailure()\n        << \"descriptor has \" << descriptor.tensors.size()\n        << \" tensors but message has \" << message.tensors.size();\n  }\n  for (size_t idx = 0; idx < descriptor.tensors.size(); idx++) {\n    EXPECT_EQ(\n        allocation.tensors[idx].buffer.device(),\n        message.tensors[idx].buffer.device());\n    const std::string& deviceType =\n        allocation.tensors[idx].buffer.device().type;\n\n    if (deviceType == kCpuDeviceType) {\n      EXPECT_TRUE(buffersAreEqual(\n          allocation.tensors[idx].buffer.unwrap<CpuBuffer>().ptr,\n          descriptor.tensors[idx].length,\n          message.tensors[idx].buffer.unwrap<CpuBuffer>().ptr,\n          message.tensors[idx].length));\n#if TP_USE_CUDA\n    } else if (deviceType == kCudaDeviceType) {\n      std::vector<uint8_t> buffer1 = unwrapCudaBuffer(\n          allocation.tensors[idx].buffer.unwrap<CudaBuffer>(),\n          descriptor.tensors[idx].length);\n      std::vector<uint8_t> buffer2 = unwrapCudaBuffer(\n          message.tensors[idx].buffer.unwrap<CudaBuffer>(),\n          message.tensors[idx].length);\n      EXPECT_TRUE(buffersAreEqual(\n          buffer1.data(), buffer1.size(), buffer2.data(), buffer2.size()));\n#endif // TP_USE_CUDA\n    } else {\n      ADD_FAILURE() << \"Unexpected device type: \" << deviceType;\n    }\n  }\n  return ::testing::AssertionSuccess();\n}\n\n#if TP_USE_CUDA\nstruct CudaPointerDeleter {\n  void operator()(void* ptr) {\n    TP_CUDA_CHECK(cudaFree(ptr));\n  }\n};\n\nstd::unique_ptr<void, CudaPointerDeleter> makeCudaPointer(size_t length) {\n  void* cudaPtr;\n  TP_CUDA_CHECK(cudaMalloc(&cudaPtr, length));\n  return std::unique_ptr<void, CudaPointerDeleter>(cudaPtr);\n}\n#endif // TP_USE_CUDA\n\n// Having 4 payloads per message is arbitrary.\nconstexpr int kNumPayloads = 4;\n// Having 4 tensors per message ensures there are 2 CPU tensors and 2 CUDA\n// tensors.\nconstexpr int kNumTensors = 4;\nstd::string kPayloadData = \"I'm a payload\";\nstd::string kTensorData = \"And I'm a tensor\";\n#if TP_USE_CUDA\nconst int kCudaTensorLength = 32;\nconst uint8_t kCudaTensorFillValue = 0x42;\n#endif // TP_USE_CUDA\n\nMessage::Tensor makeTensor(int index) {\n#if TP_USE_CUDA\n  static std::unique_ptr<void, CudaPointerDeleter> kCudaTensorData = []() {\n    auto cudaPtr = makeCudaPointer(kCudaTensorLength);\n    TP_CUDA_CHECK(\n        cudaMemset(cudaPtr.get(), kCudaTensorFillValue, kCudaTensorLength));\n    return cudaPtr;\n  }();\n\n  if (index % 2 == 1) {\n    return {\n        .buffer =\n            CudaBuffer{\n                .ptr = kCudaTensorData.get(),\n                .stream = cudaStreamDefault,\n            },\n        // FIXME: Use non-blocking stream.\n        .length = kCudaTensorLength,\n    };\n  }\n#endif // TP_USE_CUDA\n\n  return {\n      .buffer =\n          CpuBuffer{\n              .ptr = reinterpret_cast<void*>(\n                  const_cast<char*>(kTensorData.data())),\n          },\n      .length = kTensorData.length(),\n  };\n}\n\nMessage makeMessage(int numPayloads, int numTensors) {\n  Message message;\n  for (int i = 0; i < numPayloads; i++) {\n    Message::Payload payload;\n    payload.data =\n        reinterpret_cast<void*>(const_cast<char*>(kPayloadData.data()));\n    payload.length = kPayloadData.length();\n    message.payloads.push_back(std::move(payload));\n  }\n  for (int i = 0; i < numTensors; i++) {\n    message.tensors.push_back(makeTensor(i));\n  }\n  return message;\n}\n\nAllocation allocateForDescriptor(\n    const Descriptor& descriptor,\n    std::vector<std::shared_ptr<void>>& buffers) {\n  Allocation allocation;\n  for (const auto& payload : descriptor.payloads) {\n    // FIXME: Changing this to a make_shared causes havoc.\n    auto payloadData = std::unique_ptr<uint8_t, std::default_delete<uint8_t[]>>(\n        new uint8_t[payload.length]);\n    allocation.payloads.push_back({.data = payloadData.get()});\n    buffers.push_back(std::move(payloadData));\n  }\n  for (const auto& tensor : descriptor.tensors) {\n    if (tensor.sourceDevice.type == kCpuDeviceType) {\n      auto tensorData =\n          std::unique_ptr<uint8_t, std::default_delete<uint8_t[]>>(\n              new uint8_t[tensor.length]);\n      allocation.tensors.push_back({\n          .buffer = CpuBuffer{.ptr = tensorData.get()},\n      });\n      buffers.push_back(std::move(tensorData));\n#if TP_USE_CUDA\n    } else if (tensor.sourceDevice.type == kCudaDeviceType) {\n      auto tensorData = makeCudaPointer(tensor.length);\n      allocation.tensors.push_back({\n          .buffer =\n              CudaBuffer{\n                  .ptr = tensorData.get(),\n                  // FIXME: Use non-blocking streams.\n                  .stream = cudaStreamDefault,\n              },\n      });\n      buffers.push_back(std::move(tensorData));\n#endif // TP_USE_CUDA\n    } else {\n      ADD_FAILURE() << \"Unrecognized device type: \" << tensor.sourceDevice.type;\n    }\n  }\n\n  return allocation;\n}\n\nMessage messageFromAllocation(\n    const Descriptor& descriptor,\n    const Allocation& allocation) {\n  Message message;\n  message.metadata = descriptor.metadata;\n  for (int payloadIdx = 0; payloadIdx < descriptor.payloads.size();\n       ++payloadIdx) {\n    message.payloads.emplace_back();\n    Message::Payload& payload = message.payloads.back();\n    payload.metadata = descriptor.payloads[payloadIdx].metadata;\n    payload.length = descriptor.payloads[payloadIdx].length;\n    payload.data = allocation.payloads[payloadIdx].data;\n  }\n  for (int tensorIdx = 0; tensorIdx < descriptor.tensors.size(); ++tensorIdx) {\n    message.tensors.emplace_back();\n    Message::Tensor& tensor = message.tensors.back();\n    tensor.metadata = descriptor.tensors[tensorIdx].metadata;\n    tensor.length = descriptor.tensors[tensorIdx].length;\n    tensor.buffer = allocation.tensors[tensorIdx].buffer;\n  }\n\n  return message;\n}\n\nstd::vector<std::string> genUrls() {\n  std::vector<std::string> res;\n\n#if TENSORPIPE_HAS_SHM_TRANSPORT\n  res.push_back(\"shm://\");\n#endif // TENSORPIPE_HAS_SHM_TRANSPORT\n  res.push_back(\"uv://127.0.0.1\");\n\n  return res;\n}\n\nstd::shared_ptr<Context> makeContext() {\n  auto context = std::make_shared<Context>();\n\n  context->registerTransport(0, \"uv\", transport::uv::create());\n#if TENSORPIPE_HAS_SHM_TRANSPORT\n  context->registerTransport(1, \"shm\", transport::shm::create());\n#endif // TENSORPIPE_HAS_SHM_TRANSPORT\n  context->registerChannel(0, \"basic\", channel::basic::create());\n#if TENSORPIPE_HAS_CMA_CHANNEL\n  context->registerChannel(1, \"cma\", channel::cma::create());\n#endif // TENSORPIPE_HAS_CMA_CHANNEL\n#if TP_USE_CUDA\n  context->registerChannel(\n      10, \"cuda_basic\", channel::cuda_basic::create(channel::basic::create()));\n#if TENSORPIPE_HAS_CUDA_IPC_CHANNEL\n  context->registerChannel(11, \"cuda_ipc\", channel::cuda_ipc::create());\n#endif // TENSORPIPE_HAS_CUDA_IPC_CHANNEL\n  context->registerChannel(12, \"cuda_xth\", channel::cuda_xth::create());\n#endif // TP_USE_CUDA\n\n  return context;\n}\n\n} // namespace\n\nTEST(Context, ClientPingSerial) {\n  ForkedThreadPeerGroup pg;\n  pg.spawn(\n      [&]() {\n        std::vector<std::shared_ptr<void>> buffers;\n        std::promise<std::shared_ptr<Pipe>> serverPipePromise;\n        std::promise<Descriptor> readDescriptorPromise;\n        std::promise<void> readMessagePromise;\n\n        auto context = makeContext();\n\n        auto listener = context->listen(genUrls());\n        pg.send(PeerGroup::kClient, listener->url(\"uv\"));\n\n        listener->accept([&](const Error& error, std::shared_ptr<Pipe> pipe) {\n          if (error) {\n            serverPipePromise.set_exception(\n                std::make_exception_ptr(std::runtime_error(error.what())));\n          } else {\n            serverPipePromise.set_value(std::move(pipe));\n          }\n        });\n        std::shared_ptr<Pipe> serverPipe = serverPipePromise.get_future().get();\n\n        serverPipe->readDescriptor(\n            [&readDescriptorPromise](\n                const Error& error, Descriptor descriptor) {\n              if (error) {\n                readDescriptorPromise.set_exception(\n                    std::make_exception_ptr(std::runtime_error(error.what())));\n              } else {\n                readDescriptorPromise.set_value(std::move(descriptor));\n              }\n            });\n\n        Descriptor descriptor = readDescriptorPromise.get_future().get();\n        Allocation allocation = allocateForDescriptor(descriptor, buffers);\n        serverPipe->read(allocation, [&readMessagePromise](const Error& error) {\n          if (error) {\n            readMessagePromise.set_exception(\n                std::make_exception_ptr(std::runtime_error(error.what())));\n          } else {\n            readMessagePromise.set_value();\n          }\n        });\n        readMessagePromise.get_future().get();\n        EXPECT_TRUE(descriptorAndAllocationMatchMessage(\n            descriptor, allocation, makeMessage(kNumPayloads, kNumTensors)));\n\n        pg.done(PeerGroup::kServer);\n        pg.join(PeerGroup::kServer);\n\n        context->join();\n      },\n      [&]() {\n        std::promise<void> writtenMessagePromise;\n\n        auto context = makeContext();\n\n        auto url = pg.recv(PeerGroup::kClient);\n        auto clientPipe = context->connect(url);\n\n        clientPipe->write(\n            makeMessage(kNumPayloads, kNumTensors),\n            [&writtenMessagePromise](const Error& error) {\n              if (error) {\n                writtenMessagePromise.set_exception(\n                    std::make_exception_ptr(std::runtime_error(error.what())));\n              } else {\n                writtenMessagePromise.set_value();\n              }\n            });\n        writtenMessagePromise.get_future().get();\n\n        pg.done(PeerGroup::kClient);\n        pg.join(PeerGroup::kClient);\n\n        context->join();\n      });\n}\n\nTEST(Context, ClientPingInline) {\n  ForkedThreadPeerGroup pg;\n  pg.spawn(\n      [&]() {\n        std::vector<std::shared_ptr<void>> buffers;\n        std::promise<std::shared_ptr<Pipe>> serverPipePromise;\n        std::promise<void> readCompletedProm;\n\n        auto context = makeContext();\n\n        auto listener = context->listen(genUrls());\n        pg.send(PeerGroup::kClient, listener->url(\"uv\"));\n\n        listener->accept([&](const Error& error, std::shared_ptr<Pipe> pipe) {\n          if (error) {\n            serverPipePromise.set_exception(\n                std::make_exception_ptr(std::runtime_error(error.what())));\n          } else {\n            serverPipePromise.set_value(std::move(pipe));\n          }\n        });\n        std::shared_ptr<Pipe> serverPipe = serverPipePromise.get_future().get();\n\n        serverPipe->readDescriptor([&serverPipe, &readCompletedProm, &buffers](\n                                       const Error& error,\n                                       Descriptor descriptor) {\n          if (error) {\n            ADD_FAILURE() << error.what();\n            readCompletedProm.set_value();\n            return;\n          }\n\n          Allocation allocation = allocateForDescriptor(descriptor, buffers);\n          serverPipe->read(\n              allocation,\n              [&readCompletedProm,\n               descriptor{std::move(descriptor)},\n               allocation](const Error& error) {\n                if (error) {\n                  readCompletedProm.set_exception(std::make_exception_ptr(\n                      std::runtime_error(error.what())));\n                } else {\n                  EXPECT_TRUE(descriptorAndAllocationMatchMessage(\n                      descriptor,\n                      allocation,\n                      makeMessage(kNumPayloads, kNumTensors)));\n                  readCompletedProm.set_value();\n                }\n              });\n        });\n        readCompletedProm.get_future().get();\n\n        pg.done(PeerGroup::kServer);\n        pg.join(PeerGroup::kServer);\n\n        context->join();\n      },\n      [&]() {\n        std::promise<void> writeCompletedProm;\n\n        auto context = makeContext();\n\n        auto url = pg.recv(PeerGroup::kClient);\n        auto clientPipe = context->connect(url);\n\n        clientPipe->write(\n            makeMessage(kNumPayloads, kNumTensors),\n            [&writeCompletedProm](const Error& error) {\n              if (error) {\n                writeCompletedProm.set_exception(\n                    std::make_exception_ptr(std::runtime_error(error.what())));\n              } else {\n                writeCompletedProm.set_value();\n              }\n            });\n        writeCompletedProm.get_future().get();\n\n        pg.done(PeerGroup::kClient);\n        pg.join(PeerGroup::kClient);\n\n        context->join();\n      });\n}\n\nTEST(Context, ServerPingPongTwice) {\n  ForkedThreadPeerGroup pg;\n  pg.spawn(\n      [&]() {\n        std::vector<std::shared_ptr<void>> buffers;\n        std::promise<std::shared_ptr<Pipe>> serverPipePromise;\n        std::promise<void> pingCompletedProm;\n\n        auto context = makeContext();\n\n        auto listener = context->listen(genUrls());\n        pg.send(PeerGroup::kClient, listener->url(\"uv\"));\n\n        listener->accept([&](const Error& error, std::shared_ptr<Pipe> pipe) {\n          if (error) {\n            serverPipePromise.set_exception(\n                std::make_exception_ptr(std::runtime_error(error.what())));\n          } else {\n            serverPipePromise.set_value(std::move(pipe));\n          }\n        });\n        std::shared_ptr<Pipe> serverPipe = serverPipePromise.get_future().get();\n\n        int numPingsGoneThrough = 0;\n        for (int i = 0; i < 2; i++) {\n          serverPipe->write(\n              makeMessage(kNumPayloads, kNumTensors),\n              [&serverPipe,\n               &pingCompletedProm,\n               &buffers,\n               &numPingsGoneThrough,\n               i](const Error& error) {\n                if (error) {\n                  ADD_FAILURE() << error.what();\n                  pingCompletedProm.set_value();\n                  return;\n                }\n                serverPipe->readDescriptor(\n                    [&serverPipe,\n                     &pingCompletedProm,\n                     &buffers,\n                     &numPingsGoneThrough,\n                     i](const Error& error, Descriptor descriptor) {\n                      if (error) {\n                        ADD_FAILURE() << error.what();\n                        pingCompletedProm.set_value();\n                        return;\n                      }\n                      Allocation allocation =\n                          allocateForDescriptor(descriptor, buffers);\n                      serverPipe->read(\n                          allocation,\n                          [&pingCompletedProm,\n                           &numPingsGoneThrough,\n                           descriptor{std::move(descriptor)},\n                           allocation,\n                           i](const Error& error) {\n                            if (error) {\n                              ADD_FAILURE() << error.what();\n                              pingCompletedProm.set_value();\n                              return;\n                            }\n                            EXPECT_TRUE(descriptorAndAllocationMatchMessage(\n                                descriptor,\n                                allocation,\n                                makeMessage(kNumPayloads, kNumTensors)));\n                            EXPECT_EQ(numPingsGoneThrough, i);\n                            numPingsGoneThrough++;\n                            if (numPingsGoneThrough == 2) {\n                              pingCompletedProm.set_value();\n                            }\n                          });\n                    });\n              });\n        }\n        pingCompletedProm.get_future().get();\n\n        pg.done(PeerGroup::kServer);\n        pg.join(PeerGroup::kServer);\n\n        context->join();\n      },\n      [&]() {\n        std::vector<std::shared_ptr<void>> buffers;\n        std::promise<void> pongCompletedProm;\n\n        auto context = makeContext();\n\n        auto url = pg.recv(PeerGroup::kClient);\n        auto clientPipe = context->connect(url);\n\n        int numPongsGoneThrough = 0;\n        for (int i = 0; i < 2; i++) {\n          clientPipe->readDescriptor([&clientPipe,\n                                      &pongCompletedProm,\n                                      &buffers,\n                                      &numPongsGoneThrough,\n                                      i](const Error& error,\n                                         Descriptor descriptor) {\n            if (error) {\n              ADD_FAILURE() << error.what();\n              pongCompletedProm.set_value();\n              return;\n            }\n            Allocation allocation = allocateForDescriptor(descriptor, buffers);\n            clientPipe->read(\n                allocation,\n                [&clientPipe,\n                 &pongCompletedProm,\n                 &numPongsGoneThrough,\n                 descriptor{std::move(descriptor)},\n                 allocation,\n                 i](const Error& error) {\n                  if (error) {\n                    ADD_FAILURE() << error.what();\n                    pongCompletedProm.set_value();\n                    return;\n                  }\n\n                  // Copy received message to send it back.\n                  Message message =\n                      messageFromAllocation(descriptor, allocation);\n                  clientPipe->write(\n                      std::move(message),\n                      [&pongCompletedProm, &numPongsGoneThrough, i](\n                          const Error& error) {\n                        if (error) {\n                          ADD_FAILURE() << error.what();\n                          pongCompletedProm.set_value();\n                          return;\n                        }\n                        EXPECT_EQ(numPongsGoneThrough, i);\n                        numPongsGoneThrough++;\n                        if (numPongsGoneThrough == 2) {\n                          pongCompletedProm.set_value();\n                        }\n                      });\n                });\n          });\n        }\n        pongCompletedProm.get_future().get();\n\n        pg.done(PeerGroup::kClient);\n        pg.join(PeerGroup::kClient);\n\n        context->join();\n      });\n}\n\nstatic void pipeRead(\n    std::shared_ptr<Pipe>& pipe,\n    std::vector<std::shared_ptr<void>>& buffers,\n    std::function<void(const Error&, Descriptor, Allocation)> fn) {\n  pipe->readDescriptor([&pipe, &buffers, fn{std::move(fn)}](\n                           const Error& error, Descriptor descriptor) mutable {\n    ASSERT_FALSE(error);\n    Allocation allocation = allocateForDescriptor(descriptor, buffers);\n    pipe->read(\n        allocation,\n        [fn{std::move(fn)}, descriptor{std::move(descriptor)}, allocation](\n            const Error& error) mutable {\n          fn(error, std::move(descriptor), std::move(allocation));\n        });\n  });\n}\n\nTEST(Context, MixedTensorMessage) {\n  constexpr int kNumMessages = 2;\n\n  ForkedThreadPeerGroup pg;\n  pg.spawn(\n      [&]() {\n        std::vector<std::shared_ptr<void>> buffers;\n        std::promise<std::shared_ptr<Pipe>> serverPipePromise;\n        std::promise<void> readCompletedProm;\n\n        auto context = makeContext();\n\n        auto listener = context->listen(genUrls());\n        pg.send(PeerGroup::kClient, listener->url(\"uv\"));\n\n        listener->accept([&](const Error& error, std::shared_ptr<Pipe> pipe) {\n          if (error) {\n            serverPipePromise.set_exception(\n                std::make_exception_ptr(std::runtime_error(error.what())));\n          } else {\n            serverPipePromise.set_value(std::move(pipe));\n          }\n        });\n        std::shared_ptr<Pipe> serverPipe = serverPipePromise.get_future().get();\n\n        std::atomic<int> readNum(kNumMessages);\n        pipeRead(\n            serverPipe,\n            buffers,\n            [&readNum, &readCompletedProm](\n                const Error& error,\n                Descriptor descriptor,\n                Allocation allocation) {\n              ASSERT_FALSE(error);\n              EXPECT_TRUE(descriptorAndAllocationMatchMessage(\n                  descriptor,\n                  allocation,\n                  makeMessage(kNumPayloads, kNumTensors)));\n              if (--readNum == 0) {\n                readCompletedProm.set_value();\n              }\n            });\n        pipeRead(\n            serverPipe,\n            buffers,\n            [&readNum, &readCompletedProm](\n                const Error& error,\n                Descriptor descriptor,\n                Allocation allocation) {\n              ASSERT_FALSE(error);\n              EXPECT_TRUE(descriptorAndAllocationMatchMessage(\n                  descriptor, allocation, makeMessage(0, 0)));\n              if (--readNum == 0) {\n                readCompletedProm.set_value();\n              }\n            });\n        readCompletedProm.get_future().get();\n\n        pg.done(PeerGroup::kServer);\n        pg.join(PeerGroup::kServer);\n\n        context->join();\n      },\n      [&]() {\n        std::promise<void> writeCompletedProm;\n\n        auto context = makeContext();\n\n        auto url = pg.recv(PeerGroup::kClient);\n        auto clientPipe = context->connect(url);\n\n        std::atomic<int> writeNum(kNumMessages);\n        clientPipe->write(\n            makeMessage(kNumPayloads, kNumTensors),\n            [&writeNum, &writeCompletedProm](const Error& error) {\n              ASSERT_FALSE(error) << error.what();\n              if (--writeNum == 0) {\n                writeCompletedProm.set_value();\n              }\n            });\n        clientPipe->write(\n            makeMessage(0, 0),\n            [&writeNum, &writeCompletedProm](const Error& error) {\n              ASSERT_FALSE(error) << error.what();\n              if (--writeNum == 0) {\n                writeCompletedProm.set_value();\n              }\n            });\n        writeCompletedProm.get_future().get();\n\n        pg.done(PeerGroup::kClient);\n        pg.join(PeerGroup::kClient);\n\n        context->join();\n      });\n}\n"
  },
  {
    "path": "tensorpipe/test/core/listener_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/tensorpipe.h>\n\n#include <cstring>\n#include <exception>\n#include <future>\n#include <memory>\n#include <string>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe;\n\nTEST(Listener, ClosingAbortsOperations) {\n  auto context = std::make_shared<Context>();\n\n  context->registerTransport(0, \"uv\", transport::uv::create());\n  context->registerChannel(0, \"basic\", channel::basic::create());\n\n  {\n    auto listener = context->listen({\"uv://127.0.0.1\"});\n\n    std::promise<void> donePromise;\n    listener->accept(\n        [&](const Error& error, std::shared_ptr<Pipe> /* unused */) {\n          EXPECT_TRUE(error);\n          donePromise.set_value();\n        });\n    listener->close();\n    donePromise.get_future().get();\n  }\n\n  context->join();\n}\n"
  },
  {
    "path": "tensorpipe/test/core/pipe_cuda_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/core/pipe_test.h>\n\nusing namespace tensorpipe;\n\nclass CudaSimpleWriteReadWithAllTargetDevicesTest\n    : public ClientServerPipeTestCase {\n  InlineMessage imessage_ = {\n      .payloads =\n          {\n              {.data = \"payload #1\", .metadata = \"payload metadata #1\"},\n              {.data = \"payload #2\", .metadata = \"payload metadata #2\"},\n              {.data = \"payload #3\", .metadata = \"payload metadata #3\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #1\",\n                  .metadata = \"tensor metadata #1\",\n                  .device = Device{kCudaDeviceType, 0},\n                  .targetDevice = Device{kCudaDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #2\",\n                  .metadata = \"tensor metadata #2\",\n                  .device = Device{kCpuDeviceType, 0},\n                  .targetDevice = Device{kCudaDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #3\",\n                  .metadata = \"tensor metadata #3\",\n                  .device = Device{kCudaDeviceType, 0},\n                  .targetDevice = Device{kCpuDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #4\",\n                  .metadata = \"tensor metadata #4\",\n                  .device = Device{kCpuDeviceType, 0},\n                  .targetDevice = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"pipe metadata\",\n  };\n\n public:\n  void server(Pipe& pipe) override {\n    Message message;\n    Storage storage;\n    std::tie(message, storage) = makeMessage(imessage_);\n    auto future = pipeWriteWithFuture(pipe, message);\n    future.get();\n  }\n\n  void client(Pipe& pipe) override {\n    Descriptor descriptor;\n    Storage storage;\n    auto future = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCudaDeviceType, 0},\n            Device{kCudaDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n        });\n    std::tie(descriptor, storage) = future.get();\n    expectDescriptorAndStorageMatchMessage(descriptor, storage, imessage_);\n  }\n};\n\nTEST(Pipe, CudaSimpleWriteReadWithAllTargetDevices) {\n  CudaSimpleWriteReadWithAllTargetDevicesTest test;\n  test.run();\n}\n\nclass CudaSimpleWriteReadWithSomeTargetDevicesTest\n    : public ClientServerPipeTestCase {\n  InlineMessage imessage_ = {\n      .payloads =\n          {\n              {.data = \"payload #1\", .metadata = \"payload metadata #1\"},\n              {.data = \"payload #2\", .metadata = \"payload metadata #2\"},\n              {.data = \"payload #3\", .metadata = \"payload metadata #3\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #1\",\n                  .metadata = \"tensor metadata #1\",\n                  .device = Device{kCudaDeviceType, 0},\n                  .targetDevice = Device{kCudaDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #2\",\n                  .metadata = \"tensor metadata #2\",\n                  .device = Device{kCudaDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #3\",\n                  .metadata = \"tensor metadata #3\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"pipe metadata\",\n  };\n\n public:\n  void server(Pipe& pipe) override {\n    Message message;\n    Storage storage;\n    std::tie(message, storage) = makeMessage(imessage_);\n    auto future = pipeWriteWithFuture(pipe, message);\n    future.get();\n  }\n\n  void client(Pipe& pipe) override {\n    Descriptor descriptor;\n    Storage storage;\n    auto future = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCudaDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n        });\n    std::tie(descriptor, storage) = future.get();\n    expectDescriptorAndStorageMatchMessage(descriptor, storage, imessage_);\n  }\n};\n\nTEST(Pipe, CudaSimpleWriteReadWithSomeTargetDevices) {\n  CudaSimpleWriteReadWithSomeTargetDevicesTest test;\n  test.run();\n}\n\nclass CudaSimpleWriteReadWithoutTargetDeviceTest\n    : public ClientServerPipeTestCase {\n  InlineMessage imessage_ = {\n      .payloads =\n          {\n              {.data = \"payload #1\", .metadata = \"payload metadata #1\"},\n              {.data = \"payload #2\", .metadata = \"payload metadata #2\"},\n              {.data = \"payload #3\", .metadata = \"payload metadata #3\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #1\",\n                  .metadata = \"tensor metadata #1\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #2\",\n                  .metadata = \"tensor metadata #2\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #3\",\n                  .metadata = \"tensor metadata #3\",\n                  .device = Device{kCudaDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #4\",\n                  .metadata = \"tensor metadata #4\",\n                  .device = Device{kCudaDeviceType, 0},\n              },\n          },\n      .metadata = \"pipe metadata\",\n  };\n\n public:\n  void server(Pipe& pipe) override {\n    Message message;\n    Storage storage;\n    std::tie(message, storage) = makeMessage(imessage_);\n    auto future = pipeWriteWithFuture(pipe, message);\n    future.get();\n  }\n\n  void client(Pipe& pipe) override {\n    Descriptor descriptor;\n    Storage storage;\n    auto future = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCpuDeviceType, 0},\n            Device{kCudaDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n            Device{kCudaDeviceType, 0},\n        });\n    std::tie(descriptor, storage) = future.get();\n    expectDescriptorAndStorageMatchMessage(descriptor, storage, imessage_);\n  }\n};\n\nTEST(Pipe, CudaSimpleWriteReadWithoutTargetDevice) {\n  CudaSimpleWriteReadWithoutTargetDeviceTest test;\n  test.run();\n}\n"
  },
  {
    "path": "tensorpipe/test/core/pipe_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/core/pipe_test.h>\n\nusing namespace tensorpipe;\n\nclass SimpleWriteReadTest : public ClientServerPipeTestCase {\n  InlineMessage imessage_ = {\n      .payloads =\n          {\n              {.data = \"payload #1\", .metadata = \"payload metadata #1\"},\n              {.data = \"payload #2\", .metadata = \"payload metadata #2\"},\n              {.data = \"payload #3\", .metadata = \"payload metadata #3\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #1\",\n                  .metadata = \"tensor metadata #1\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #2\",\n                  .metadata = \"tensor metadata #2\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #3\",\n                  .metadata = \"tensor metadata #3\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"pipe metadata\",\n  };\n\n public:\n  void server(Pipe& pipe) override {\n    Message message;\n    Storage storage;\n    std::tie(message, storage) = makeMessage(imessage_);\n    auto future = pipeWriteWithFuture(pipe, message);\n    future.get();\n  }\n\n  void client(Pipe& pipe) override {\n    Descriptor descriptor;\n    Storage storage;\n    auto future = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCpuDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n        });\n    std::tie(descriptor, storage) = future.get();\n    expectDescriptorAndStorageMatchMessage(descriptor, storage, imessage_);\n  }\n};\n\nTEST(Pipe, SimpleWriteRead) {\n  SimpleWriteReadTest test;\n  test.run();\n}\n\nclass SimpleWriteReadPayloadsOnlyTest : public ClientServerPipeTestCase {\n  InlineMessage imessage_ = {\n      .payloads =\n          {\n              {.data = \"payload #1\", .metadata = \"payload metadata #1\"},\n              {.data = \"payload #2\", .metadata = \"payload metadata #2\"},\n              {.data = \"payload #3\", .metadata = \"payload metadata #3\"},\n          },\n      .tensors = {},\n      .metadata = \"pipe metadata\",\n  };\n\n public:\n  void server(Pipe& pipe) override {\n    Message message;\n    Storage storage;\n    std::tie(message, storage) = makeMessage(imessage_);\n    auto future = pipeWriteWithFuture(pipe, message);\n    future.get();\n  }\n\n  void client(Pipe& pipe) override {\n    Descriptor descriptor;\n    Storage storage;\n    auto future = pipeReadWithFuture(pipe, /*targetDevices=*/{});\n    std::tie(descriptor, storage) = future.get();\n    expectDescriptorAndStorageMatchMessage(descriptor, storage, imessage_);\n  }\n};\n\nTEST(Pipe, SimpleWriteReadPayloadsOnly) {\n  SimpleWriteReadPayloadsOnlyTest test;\n  test.run();\n}\n\nclass SimpleWriteReadTensorsOnlyTest : public ClientServerPipeTestCase {\n  InlineMessage imessage_ = {\n      .payloads = {},\n      .tensors =\n          {\n              {\n                  .data = \"tensor #1\",\n                  .metadata = \"tensor metadata #1\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #2\",\n                  .metadata = \"tensor metadata #2\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #3\",\n                  .metadata = \"tensor metadata #3\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"pipe metadata\",\n  };\n\n public:\n  void server(Pipe& pipe) override {\n    Message message;\n    Storage storage;\n    std::tie(message, storage) = makeMessage(imessage_);\n    auto future = pipeWriteWithFuture(pipe, message);\n    future.get();\n  }\n\n  void client(Pipe& pipe) override {\n    Descriptor descriptor;\n    Storage storage;\n    auto future = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCpuDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n        });\n    std::tie(descriptor, storage) = future.get();\n    expectDescriptorAndStorageMatchMessage(descriptor, storage, imessage_);\n  }\n};\n\nTEST(Pipe, SimpleWriteReadTensorsOnly) {\n  SimpleWriteReadTensorsOnlyTest test;\n  test.run();\n}\n\nclass SimpleWriteReadWithAllTargetDevicesTest\n    : public ClientServerPipeTestCase {\n  InlineMessage imessage_ = {\n      .payloads =\n          {\n              {.data = \"payload #1\", .metadata = \"payload metadata #1\"},\n              {.data = \"payload #2\", .metadata = \"payload metadata #2\"},\n              {.data = \"payload #3\", .metadata = \"payload metadata #3\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #1\",\n                  .metadata = \"tensor metadata #1\",\n                  .device = Device{kCpuDeviceType, 0},\n                  .targetDevice = Device{kCpuDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #2\",\n                  .metadata = \"tensor metadata #2\",\n                  .device = Device{kCpuDeviceType, 0},\n                  .targetDevice = Device{kCpuDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #3\",\n                  .metadata = \"tensor metadata #3\",\n                  .device = Device{kCpuDeviceType, 0},\n                  .targetDevice = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"pipe metadata\",\n  };\n\n public:\n  void server(Pipe& pipe) override {\n    Message message;\n    Storage storage;\n    std::tie(message, storage) = makeMessage(imessage_);\n    auto future = pipeWriteWithFuture(pipe, message);\n    future.get();\n  }\n\n  void client(Pipe& pipe) override {\n    Descriptor descriptor;\n    Storage storage;\n    auto future = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCpuDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n        });\n    std::tie(descriptor, storage) = future.get();\n    expectDescriptorAndStorageMatchMessage(descriptor, storage, imessage_);\n  }\n};\n\nTEST(Pipe, SimpleWriteReadWithAllTargetDevices) {\n  SimpleWriteReadWithAllTargetDevicesTest test;\n  test.run();\n}\n\nclass SimpleWriteReadWithSomeTargetDevicesTest\n    : public ClientServerPipeTestCase {\n  InlineMessage imessage_ = {\n      .payloads =\n          {\n              {.data = \"payload #1\", .metadata = \"payload metadata #1\"},\n              {.data = \"payload #2\", .metadata = \"payload metadata #2\"},\n              {.data = \"payload #3\", .metadata = \"payload metadata #3\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #1\",\n                  .metadata = \"tensor metadata #1\",\n                  .device = Device{kCpuDeviceType, 0},\n                  .targetDevice = Device{kCpuDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #2\",\n                  .metadata = \"tensor metadata #2\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n              {\n                  .data = \"tensor #3\",\n                  .metadata = \"tensor metadata #3\",\n                  .device = Device{kCpuDeviceType, 0},\n                  .targetDevice = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"pipe metadata\",\n  };\n\n public:\n  void server(Pipe& pipe) override {\n    Message message;\n    Storage storage;\n    std::tie(message, storage) = makeMessage(imessage_);\n    auto future = pipeWriteWithFuture(pipe, message);\n    future.get();\n  }\n\n  void client(Pipe& pipe) override {\n    Descriptor descriptor;\n    Storage storage;\n    auto future = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCpuDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n            Device{kCpuDeviceType, 0},\n        });\n    std::tie(descriptor, storage) = future.get();\n    expectDescriptorAndStorageMatchMessage(descriptor, storage, imessage_);\n  }\n};\n\nTEST(Pipe, SimpleWriteReadWithSomeTargetDevices) {\n  SimpleWriteReadWithSomeTargetDevicesTest test;\n  test.run();\n}\n\nclass MultipleWriteReadTest : public ClientServerPipeTestCase {\n  InlineMessage imessage1_ = {\n      .payloads =\n          {\n              {.data = \"payload #1.1\", .metadata = \"payload metadata #1.1\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #1.1\",\n                  .metadata = \"tensor metadata #1.1\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"message metadata\",\n  };\n\n  InlineMessage imessage2_ = {\n      .payloads =\n          {\n              {.data = \"payload #2.1\", .metadata = \"payload metadata #2.1\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #2.1\",\n                  .metadata = \"tensor metadata #2.1\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"message metadata\",\n  };\n\n public:\n  void server(Pipe& pipe) override {\n    Message message1;\n    Storage storage1;\n    std::tie(message1, storage1) = makeMessage(imessage1_);\n    auto future1 = pipeWriteWithFuture(pipe, message1);\n\n    Message message2;\n    Storage storage2;\n    std::tie(message2, storage2) = makeMessage(imessage2_);\n    auto future2 = pipeWriteWithFuture(pipe, message2);\n\n    future1.get();\n    future2.get();\n  }\n\n  void client(Pipe& pipe) override {\n    auto future1 = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCpuDeviceType, 0},\n        });\n    auto future2 = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCpuDeviceType, 0},\n        });\n\n    Descriptor descriptor1;\n    Storage storage1;\n    std::tie(descriptor1, storage1) = future1.get();\n    expectDescriptorAndStorageMatchMessage(descriptor1, storage1, imessage1_);\n\n    Descriptor descriptor2;\n    Storage storage2;\n    std::tie(descriptor2, storage2) = future2.get();\n    expectDescriptorAndStorageMatchMessage(descriptor2, storage2, imessage2_);\n  }\n};\n\nTEST(Pipe, MultipleWriteRead) {\n  MultipleWriteReadTest test;\n  test.run();\n}\n\nclass MultipleWriteReadWithSomeTargetDevicesTest\n    : public ClientServerPipeTestCase {\n  InlineMessage imessage1_ = {\n      .payloads =\n          {\n              {.data = \"payload #1.1\", .metadata = \"payload metadata #1.1\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #1.1\",\n                  .metadata = \"tensor metadata #1.1\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"message metadata\",\n  };\n\n  InlineMessage imessage2_ = {\n      .payloads =\n          {\n              {.data = \"payload #2.1\", .metadata = \"payload metadata #2.1\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #2.1\",\n                  .metadata = \"tensor metadata #2.1\",\n                  .device = Device{kCpuDeviceType, 0},\n                  .targetDevice = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"message metadata\",\n  };\n\n public:\n  void server(Pipe& pipe) override {\n    Message message1;\n    Storage storage1;\n    std::tie(message1, storage1) = makeMessage(imessage1_);\n    auto future1 = pipeWriteWithFuture(pipe, message1);\n\n    Message message2;\n    Storage storage2;\n    std::tie(message2, storage2) = makeMessage(imessage2_);\n    auto future2 = pipeWriteWithFuture(pipe, message2);\n\n    future1.get();\n    future2.get();\n  }\n\n  void client(Pipe& pipe) override {\n    auto future1 = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCpuDeviceType, 0},\n        });\n    auto future2 = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCpuDeviceType, 0},\n        });\n\n    Descriptor descriptor1;\n    Storage storage1;\n    std::tie(descriptor1, storage1) = future1.get();\n    expectDescriptorAndStorageMatchMessage(descriptor1, storage1, imessage1_);\n\n    Descriptor descriptor2;\n    Storage storage2;\n    std::tie(descriptor2, storage2) = future2.get();\n    expectDescriptorAndStorageMatchMessage(descriptor2, storage2, imessage2_);\n  }\n};\n\nTEST(Pipe, MultipleWriteReadWithSomeTargetDevices) {\n  MultipleWriteReadWithSomeTargetDevicesTest test;\n  test.run();\n}\n\nclass WriteFromBothThenReadTest : public ClientServerPipeTestCase {\n  InlineMessage imessage1_ = {\n      .payloads =\n          {\n              {.data = \"payload #1.1\", .metadata = \"payload metadata #1.1\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #1.1\",\n                  .metadata = \"tensor metadata #1.1\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"message metadata\",\n  };\n\n  InlineMessage imessage2_ = {\n      .payloads =\n          {\n              {.data = \"payload #2.1\", .metadata = \"payload metadata #2.1\"},\n          },\n      .tensors =\n          {\n              {\n                  .data = \"tensor #2.1\",\n                  .metadata = \"tensor metadata #2.1\",\n                  .device = Device{kCpuDeviceType, 0},\n              },\n          },\n      .metadata = \"message metadata\",\n  };\n\n public:\n  void server(Pipe& pipe) override {\n    Message message;\n    Storage writeStorage;\n    std::tie(message, writeStorage) = makeMessage(imessage1_);\n    auto writeFuture = pipeWriteWithFuture(pipe, message);\n\n    auto readFuture = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCpuDeviceType, 0},\n        });\n\n    writeFuture.get();\n\n    Descriptor descriptor;\n    Storage readStorage;\n    std::tie(descriptor, readStorage) = readFuture.get();\n    expectDescriptorAndStorageMatchMessage(descriptor, readStorage, imessage2_);\n  }\n\n  void client(Pipe& pipe) override {\n    Message message;\n    Storage writeStorage;\n    std::tie(message, writeStorage) = makeMessage(imessage2_);\n    auto writeFuture = pipeWriteWithFuture(pipe, message);\n\n    auto readFuture = pipeReadWithFuture(\n        pipe,\n        /*targetDevices=*/\n        {\n            Device{kCpuDeviceType, 0},\n        });\n\n    writeFuture.get();\n\n    Descriptor descriptor;\n    Storage readStorage;\n    std::tie(descriptor, readStorage) = readFuture.get();\n    expectDescriptorAndStorageMatchMessage(descriptor, readStorage, imessage1_);\n  }\n};\n\nTEST(Pipe, WriteFromBothThenRead) {\n  WriteFromBothThenReadTest test;\n  test.run();\n}\n"
  },
  {
    "path": "tensorpipe/test/core/pipe_test.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <future>\n#include <memory>\n#include <tuple>\n#include <vector>\n\n#include <gtest/gtest.h>\n\n#include <tensorpipe/tensorpipe.h>\n#include <tensorpipe/test/peer_group.h>\n\n#if TP_USE_CUDA\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/tensorpipe_cuda.h>\n#endif // TP_USE_CUDA\n\nstruct Storage {\n  std::vector<std::shared_ptr<void>> payloads;\n  std::vector<std::pair<std::shared_ptr<void>, tensorpipe::Buffer>> tensors;\n};\n\nstruct InlineMessage {\n  struct Payload {\n    std::string data;\n    std::string metadata;\n  };\n\n  struct Tensor {\n    std::string data;\n    std::string metadata;\n    tensorpipe::Device device;\n    tensorpipe::optional<tensorpipe::Device> targetDevice;\n  };\n\n  std::vector<Payload> payloads;\n  std::vector<Tensor> tensors;\n  std::string metadata;\n};\n\ninline std::pair<tensorpipe::Message, Storage> makeMessage(\n    InlineMessage imessage) {\n  tensorpipe::Message message;\n  Storage storage;\n\n  for (auto& payload : imessage.payloads) {\n    size_t length = payload.data.length();\n    auto data = std::unique_ptr<uint8_t, std::default_delete<uint8_t[]>>(\n        new uint8_t[length]);\n    std::memcpy(data.get(), &payload.data[0], length);\n    message.payloads.push_back({\n        .data = data.get(),\n        .length = length,\n        .metadata = payload.metadata,\n    });\n    storage.payloads.push_back(std::move(data));\n  }\n\n  for (auto& tensor : imessage.tensors) {\n    size_t length = tensor.data.length();\n    tensorpipe::Buffer buffer;\n    std::shared_ptr<void> data;\n    if (tensor.device.type == tensorpipe::kCpuDeviceType) {\n      data = std::unique_ptr<uint8_t, std::default_delete<uint8_t[]>>(\n          new uint8_t[length]);\n      std::memcpy(data.get(), &tensor.data[0], length);\n      buffer = tensorpipe::CpuBuffer{.ptr = data.get()};\n#if TP_USE_CUDA\n    } else if (tensor.device.type == tensorpipe::kCudaDeviceType) {\n      void* cudaPtr;\n      TP_CUDA_CHECK(cudaSetDevice(tensor.device.index));\n      TP_CUDA_CHECK(cudaMalloc(&cudaPtr, length));\n      data = std::unique_ptr<void, std::function<void(void*)>>(\n          cudaPtr, [](void* ptr) { TP_CUDA_CHECK(cudaFree(ptr)); });\n      // TODO: Properly dispose of stream when done.\n      cudaStream_t stream;\n      TP_CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));\n      buffer = tensorpipe::CudaBuffer{\n          .ptr = data.get(),\n          .stream = stream,\n      };\n      TP_CUDA_CHECK(cudaMemcpyAsync(\n          cudaPtr, &tensor.data[0], length, cudaMemcpyDefault, stream));\n#endif // TP_USE_CUDA\n    } else {\n      ADD_FAILURE() << \"Unexpected source device: \" << tensor.device.toString();\n    }\n\n    message.tensors.push_back({\n        .buffer = buffer,\n        .length = length,\n        .targetDevice = tensor.targetDevice,\n        .metadata = tensor.metadata,\n    });\n    storage.tensors.push_back({std::move(data), std::move(buffer)});\n  }\n\n  message.metadata = imessage.metadata;\n\n  return {std::move(message), std::move(storage)};\n}\n\ninline std::pair<tensorpipe::Allocation, Storage> makeAllocation(\n    const tensorpipe::Descriptor& descriptor,\n    const std::vector<tensorpipe::Device>& devices) {\n  tensorpipe::Allocation allocation;\n  Storage storage;\n  for (const auto& payload : descriptor.payloads) {\n    auto data = std::unique_ptr<uint8_t, std::default_delete<uint8_t[]>>(\n        new uint8_t[payload.length]);\n    allocation.payloads.push_back({.data = data.get()});\n    storage.payloads.push_back(std::move(data));\n  }\n\n  TP_DCHECK(devices.size() == descriptor.tensors.size());\n  for (size_t tensorIdx = 0; tensorIdx < descriptor.tensors.size();\n       ++tensorIdx) {\n    const auto& tensor = descriptor.tensors[tensorIdx];\n    tensorpipe::Device targetDevice = devices[tensorIdx];\n\n    if (tensor.targetDevice.has_value()) {\n      TP_DCHECK(targetDevice == *tensor.targetDevice);\n    }\n\n    if (targetDevice.type == tensorpipe::kCpuDeviceType) {\n      auto data = std::unique_ptr<uint8_t, std::default_delete<uint8_t[]>>(\n          new uint8_t[tensor.length]);\n      tensorpipe::Buffer buffer = tensorpipe::CpuBuffer{.ptr = data.get()};\n      allocation.tensors.push_back({.buffer = buffer});\n      storage.tensors.push_back({std::move(data), std::move(buffer)});\n#if TP_USE_CUDA\n    } else if (targetDevice.type == tensorpipe::kCudaDeviceType) {\n      void* cudaPtr;\n      TP_CUDA_CHECK(cudaSetDevice(targetDevice.index));\n      TP_CUDA_CHECK(cudaMalloc(&cudaPtr, tensor.length));\n      auto data = std::unique_ptr<void, std::function<void(void*)>>(\n          cudaPtr, [](void* ptr) { TP_CUDA_CHECK(cudaFree(ptr)); });\n      // TODO: Properly dispose of stream when done.\n      cudaStream_t stream;\n      TP_CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));\n      tensorpipe::Buffer buffer = tensorpipe::CudaBuffer{\n          .ptr = data.get(),\n          .stream = stream,\n      };\n      allocation.tensors.push_back({.buffer = buffer});\n      storage.tensors.push_back({std::move(data), std::move(buffer)});\n#endif // TP_USE_CUDA\n    } else {\n      ADD_FAILURE() << \"Unexpected target device: \" << targetDevice.toString();\n    }\n  }\n\n  return {std::move(allocation), std::move(storage)};\n}\n\ninline std::future<void> pipeWriteWithFuture(\n    tensorpipe::Pipe& pipe,\n    tensorpipe::Message message) {\n  auto promise = std::make_shared<std::promise<void>>();\n  auto future = promise->get_future();\n\n  pipe.write(\n      std::move(message),\n      [promise{std::move(promise)}](const tensorpipe::Error& error) {\n        if (error) {\n          promise->set_exception(\n              std::make_exception_ptr(std::runtime_error(error.what())));\n          return;\n        }\n\n        promise->set_value();\n      });\n\n  return future;\n}\n\ninline std::future<std::tuple<tensorpipe::Descriptor, Storage>>\npipeReadWithFuture(\n    tensorpipe::Pipe& pipe,\n    std::vector<tensorpipe::Device> targetDevices) {\n  auto promise = std::make_shared<\n      std::promise<std::tuple<tensorpipe::Descriptor, Storage>>>();\n  auto future = promise->get_future();\n  pipe.readDescriptor([&pipe,\n                       promise{std::move(promise)},\n                       targetDevices{std::move(targetDevices)}](\n                          const tensorpipe::Error& error,\n                          tensorpipe::Descriptor descriptor) mutable {\n    if (error) {\n      promise->set_exception(\n          std::make_exception_ptr(std::runtime_error(error.what())));\n      return;\n    }\n\n    tensorpipe::Allocation allocation;\n    Storage storage;\n    std::tie(allocation, storage) = makeAllocation(descriptor, targetDevices);\n    pipe.read(\n        std::move(allocation),\n        [promise{std::move(promise)},\n         descriptor{std::move(descriptor)},\n         storage{std::move(storage)}](const tensorpipe::Error& error) mutable {\n          if (error) {\n            promise->set_exception(\n                std::make_exception_ptr(std::runtime_error(error.what())));\n            return;\n          }\n\n          promise->set_value(std::make_tuple<tensorpipe::Descriptor, Storage>(\n              std::move(descriptor), std::move(storage)));\n        });\n  });\n\n  return future;\n}\n\ninline void expectDescriptorAndStorageMatchMessage(\n    tensorpipe::Descriptor descriptor,\n    Storage storage,\n    InlineMessage imessage) {\n  EXPECT_EQ(imessage.metadata, descriptor.metadata);\n\n  EXPECT_EQ(descriptor.payloads.size(), storage.payloads.size());\n  EXPECT_EQ(imessage.payloads.size(), storage.payloads.size());\n  for (size_t idx = 0; idx < imessage.payloads.size(); ++idx) {\n    EXPECT_EQ(\n        imessage.payloads[idx].metadata, descriptor.payloads[idx].metadata);\n    EXPECT_EQ(\n        imessage.payloads[idx].data.length(), descriptor.payloads[idx].length);\n    EXPECT_EQ(\n        imessage.payloads[idx].data,\n        std::string(\n            static_cast<char*>(storage.payloads[idx].get()),\n            descriptor.payloads[idx].length));\n  }\n\n  EXPECT_EQ(descriptor.tensors.size(), storage.tensors.size());\n  EXPECT_EQ(imessage.tensors.size(), storage.tensors.size());\n  for (size_t idx = 0; idx < imessage.tensors.size(); ++idx) {\n    EXPECT_TRUE(\n        imessage.tensors[idx].device == descriptor.tensors[idx].sourceDevice);\n    EXPECT_EQ(imessage.tensors[idx].metadata, descriptor.tensors[idx].metadata);\n    EXPECT_EQ(\n        imessage.tensors[idx].targetDevice,\n        descriptor.tensors[idx].targetDevice);\n    const tensorpipe::Device& device = storage.tensors[idx].second.device();\n    EXPECT_TRUE(\n        !imessage.tensors[idx].targetDevice ||\n        imessage.tensors[idx].targetDevice == device);\n    size_t length = descriptor.tensors[idx].length;\n    EXPECT_EQ(imessage.tensors[idx].data.length(), length);\n    if (device.type == tensorpipe::kCpuDeviceType) {\n      const tensorpipe::CpuBuffer& buffer =\n          storage.tensors[idx].second.unwrap<tensorpipe::CpuBuffer>();\n      EXPECT_EQ(\n          imessage.tensors[idx].data,\n          std::string(static_cast<char*>(buffer.ptr), length));\n#if TP_USE_CUDA\n    } else if (device.type == tensorpipe::kCudaDeviceType) {\n      const tensorpipe::CudaBuffer& buffer =\n          storage.tensors[idx].second.unwrap<tensorpipe::CudaBuffer>();\n      std::string data(length, 0x0);\n      TP_CUDA_CHECK(cudaStreamSynchronize(buffer.stream));\n      TP_CUDA_CHECK(\n          cudaMemcpy(&data[0], buffer.ptr, length, cudaMemcpyDefault));\n      EXPECT_EQ(imessage.tensors[idx].data, data.data());\n#endif // TP_USE_CUDA\n    } else {\n      ADD_FAILURE() << \"Unexpected target device: \" << device.toString();\n    }\n  }\n}\n\ninline std::vector<std::string> genUrls() {\n  std::vector<std::string> res;\n\n#if TENSORPIPE_HAS_SHM_TRANSPORT\n  res.push_back(\"shm://\");\n#endif // TENSORPIPE_HAS_SHM_TRANSPORT\n  res.push_back(\"uv://127.0.0.1\");\n\n  return res;\n}\n\ninline std::shared_ptr<tensorpipe::Context> makeContext() {\n  auto context = std::make_shared<tensorpipe::Context>();\n\n  context->registerTransport(0, \"uv\", tensorpipe::transport::uv::create());\n#if TENSORPIPE_HAS_SHM_TRANSPORT\n  context->registerTransport(1, \"shm\", tensorpipe::transport::shm::create());\n#endif // TENSORPIPE_HAS_SHM_TRANSPORT\n  context->registerChannel(100, \"basic\", tensorpipe::channel::basic::create());\n#if TENSORPIPE_HAS_CMA_CHANNEL\n  context->registerChannel(101, \"cma\", tensorpipe::channel::cma::create());\n#endif // TENSORPIPE_HAS_CMA_CHANNEL\n\n#if TP_USE_CUDA\n  context->registerChannel(\n      10,\n      \"cuda_basic\",\n      tensorpipe::channel::cuda_basic::create(\n          tensorpipe::channel::basic::create()));\n#if TENSORPIPE_HAS_CUDA_IPC_CHANNEL\n  context->registerChannel(\n      11, \"cuda_ipc\", tensorpipe::channel::cuda_ipc::create());\n#endif // TENSORPIPE_HAS_CUDA_IPC_CHANNEL\n  context->registerChannel(\n      12, \"cuda_xth\", tensorpipe::channel::cuda_xth::create());\n#endif // TP_USE_CUDA\n\n  return context;\n}\n\nclass ClientServerPipeTestCase {\n  ForkedThreadPeerGroup pg_;\n\n public:\n  void run() {\n    pg_.spawn(\n        [&]() {\n          auto context = makeContext();\n\n          auto listener = context->listen(genUrls());\n          pg_.send(PeerGroup::kClient, listener->url(\"uv\"));\n\n          std::promise<std::shared_ptr<tensorpipe::Pipe>> promise;\n          listener->accept([&](const tensorpipe::Error& error,\n                               std::shared_ptr<tensorpipe::Pipe> pipe) {\n            if (error) {\n              promise.set_exception(\n                  std::make_exception_ptr(std::runtime_error(error.what())));\n            } else {\n              promise.set_value(std::move(pipe));\n            }\n          });\n\n          std::shared_ptr<tensorpipe::Pipe> pipe = promise.get_future().get();\n          server(*pipe);\n\n          pg_.done(PeerGroup::kServer);\n          pg_.join(PeerGroup::kServer);\n\n          context->join();\n        },\n        [&]() {\n          auto context = makeContext();\n\n          auto url = pg_.recv(PeerGroup::kClient);\n          auto pipe = context->connect(url);\n\n          client(*pipe);\n\n          pg_.done(PeerGroup::kClient);\n          pg_.join(PeerGroup::kClient);\n\n          context->join();\n        });\n  }\n\n  virtual void client(tensorpipe::Pipe& pipe) = 0;\n  virtual void server(tensorpipe::Pipe& pipe) = 0;\n\n  virtual ~ClientServerPipeTestCase() = default;\n};\n"
  },
  {
    "path": "tensorpipe/test/peer_group.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <array>\n#include <string>\n#include <thread>\n\n#include <unistd.h>\n\n#include <gtest/gtest.h>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/queue.h>\n\nclass PeerGroup {\n public:\n  static constexpr int kNumPeers = 2;\n  static constexpr int kServer = 0;\n  static constexpr int kClient = 1;\n\n  virtual ~PeerGroup() = default;\n\n  // Send message to given peer.\n  virtual void send(int receiverId, const std::string&) = 0;\n\n  // Read next message for given peer. This method is blocking.\n  virtual std::string recv(int receiverId) = 0;\n\n  // Spawn two peers each running one of the provided functions.\n  virtual void spawn(std::function<void()>, std::function<void()>) = 0;\n\n  // Whether the two endpoints are two threads in the same process (as opposed\n  // to two separate processes).\n  virtual bool endpointsInSameProcess() const = 0;\n\n  // Signal other peers that this peer is done.\n  void done(int selfId) {\n    send(1 - selfId, doneString_);\n    std::unique_lock<std::mutex> lock(m_);\n    done_[selfId] = true;\n    condVar_[selfId].notify_one();\n  }\n\n  // Wait for all peers (including this one) to be done.\n  void join(int selfId) {\n    EXPECT_EQ(doneString_, recv(selfId));\n\n    std::unique_lock<std::mutex> lock(m_);\n    condVar_[selfId].wait(lock, [&] { return done_[selfId]; });\n  }\n\n private:\n  // This should be static but then we need to define it out-of-line (or mark it\n  // as inline once we can use C++-17).\n  const std::string doneString_ = \"done\";\n  std::mutex m_;\n  std::array<bool, kNumPeers> done_{{false, false}};\n  std::array<std::condition_variable, kNumPeers> condVar_;\n};\n\nclass ThreadPeerGroup : public PeerGroup {\n public:\n  void send(int receiverId, const std::string& str) override {\n    q_[receiverId].push(str);\n  }\n\n  std::string recv(int receiverId) override {\n    return q_[receiverId].pop();\n  }\n\n  void spawn(std::function<void()> f1, std::function<void()> f2) override {\n    std::array<std::function<void()>, kNumPeers> fns = {\n        std::move(f1), std::move(f2)};\n    std::array<std::thread, kNumPeers> ts;\n\n    for (int peerId = 0; peerId < kNumPeers; ++peerId) {\n      ts[peerId] = std::thread(fns[peerId]);\n    }\n\n    for (auto& t : ts) {\n      t.join();\n    }\n  }\n\n  bool endpointsInSameProcess() const override {\n    return true;\n  }\n\n private:\n  std::array<tensorpipe::Queue<std::string>, kNumPeers> q_;\n};\n\nclass ForkedThreadPeerGroup : public ThreadPeerGroup {\n public:\n  void spawn(std::function<void()> f1, std::function<void()> f2) override {\n    // Some tests modify the global state of the process (such as initializing\n    // the CUDA context), which would cause other tests running as sub-processes\n    // to fail. Here, we run all thread-based tests in a sub-process to avoid\n    // this issue.\n    pid_t pid = fork();\n    TP_THROW_SYSTEM_IF(pid < 0, errno) << \"Failed to fork\";\n    if (pid == 0) {\n      ThreadPeerGroup::spawn(f1, f2);\n      std::exit(((testing::Test::HasFailure()) ? 1 : 0));\n    }\n\n    int status;\n    TP_THROW_SYSTEM_IF(waitpid(pid, &status, 0) < 0, errno)\n        << \"Failed to wait for child test process\";\n    EXPECT_TRUE(WIFEXITED(status));\n    if (WIFSIGNALED(status)) {\n      TP_LOG_WARNING() << \"Test process terminated with signal \"\n                       << WTERMSIG(status);\n    }\n    const int exitStatus = WEXITSTATUS(status);\n    EXPECT_EQ(0, exitStatus);\n  }\n};\n\nclass ProcessPeerGroup : public PeerGroup {\n public:\n  void send(int receiverId, const std::string& str) override {\n    uint64_t len = str.length();\n\n    int ret;\n\n    ret = write(pipefd_[receiverId][kWriteEnd], &len, sizeof(len));\n    TP_THROW_SYSTEM_IF(ret < 0, errno) << \"Failed to write to pipe\";\n    EXPECT_EQ(sizeof(len), ret);\n\n    ret = write(pipefd_[receiverId][kWriteEnd], str.data(), len);\n    TP_THROW_SYSTEM_IF(ret < 0, errno) << \"Failed to write to pipe\";\n    EXPECT_EQ(len, ret);\n  }\n\n  std::string recv(int receiverId) override {\n    int ret;\n\n    uint64_t len;\n    ret = read(pipefd_[receiverId][kReadEnd], &len, sizeof(len));\n    TP_THROW_SYSTEM_IF(ret < 0, errno) << \"Failed to read from pipe\";\n    EXPECT_EQ(sizeof(len), ret);\n\n    std::string str(len, 0);\n    ret = read(pipefd_[receiverId][kReadEnd], &str[0], len);\n    TP_THROW_SYSTEM_IF(ret < 0, errno) << \"Failed to read from pipe\";\n    EXPECT_EQ(len, ret);\n\n    return str;\n  }\n\n  void spawn(std::function<void()> f1, std::function<void()> f2) override {\n    std::array<std::function<void()>, kNumPeers> fns = {\n        std::move(f1), std::move(f2)};\n    std::array<pid_t, kNumPeers> pids = {-1, -1};\n\n    for (int peerId = 0; peerId < kNumPeers; ++peerId) {\n      TP_THROW_SYSTEM_IF(pipe(pipefd_[peerId].data()) < 0, errno)\n          << \"Failed to create pipe\";\n    }\n\n    for (int peerId = 0; peerId < kNumPeers; ++peerId) {\n      pids[peerId] = fork();\n      TP_THROW_SYSTEM_IF(pids[peerId] < 0, errno) << \"Failed to fork\";\n      if (pids[peerId] == 0) {\n        try {\n          // Close writing end of our pipe.\n          TP_THROW_SYSTEM_IF(close(pipefd_[peerId][kWriteEnd]) < 0, errno)\n              << \"Failed to close fd\";\n          // Close reading end of other pipe.\n          TP_THROW_SYSTEM_IF(close(pipefd_[1 - peerId][kReadEnd]) < 0, errno)\n              << \"Failed to close fd\";\n\n          fns[peerId]();\n        } catch (const std::exception& e) {\n          TP_LOG_ERROR() << \"Child #\" << peerId << \" (PID \" << getpid()\n                         << \") encountered exception \" << e.what();\n          std::exit(2);\n        } catch (...) {\n          std::exit(3);\n        }\n        std::exit(((testing::Test::HasFailure()) ? 1 : 0));\n      }\n    }\n\n    // Close all pipes in parent process.\n    for (int peerId = 0; peerId < kNumPeers; ++peerId) {\n      for (int pipeEnd = 0; pipeEnd < 2; ++pipeEnd) {\n        TP_THROW_SYSTEM_IF(close(pipefd_[peerId][pipeEnd]) < 0, errno)\n            << \"Failed to close fd\";\n      }\n    }\n\n    for (int peerId = 0; peerId < kNumPeers; ++peerId) {\n      int status;\n      TP_THROW_SYSTEM_IF(waitpid(-1, &status, 0) < 0, errno)\n          << \"Failed to wait for child process\";\n      EXPECT_TRUE(WIFEXITED(status));\n      if (WIFSIGNALED(status)) {\n        TP_LOG_WARNING() << \"Peer process terminated with signal \"\n                         << WTERMSIG(status);\n      }\n      const int exitStatus = WEXITSTATUS(status);\n      EXPECT_EQ(0, exitStatus);\n    }\n  }\n\n  bool endpointsInSameProcess() const override {\n    return false;\n  }\n\n private:\n  static constexpr int kReadEnd = 0;\n  static constexpr int kWriteEnd = 1;\n\n  std::array<std::array<int, 2>, kNumPeers> pipefd_;\n};\n"
  },
  {
    "path": "tensorpipe/test/python/tensorpipe.py",
    "content": "#!/usr/bin/env python3\n# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the BSD-style license found in the\n# LICENSE file in the root directory of this source tree.\n\nimport threading\nimport unittest\n\nimport pytensorpipe as tp\n\n\nclass TestTensorpipe(unittest.TestCase):\n    def test_read_write(self):\n        context = tp.Context()\n        context.register_transport(0, \"tcp\", tp.create_uv_transport())\n        create_shm_transport = getattr(tp, \"create_shm_transport\", None)\n        if create_shm_transport is not None:\n            context.register_transport(-1, \"shm\", create_shm_transport())\n        context.register_channel(0, \"basic\", tp.create_basic_channel())\n        create_cma_channel = getattr(tp, \"create_cma_channel\", None)\n        if create_cma_channel is not None:\n            context.register_channel(-1, \"cma\", create_cma_channel())\n\n        # We must keep a reference to it, or it will be destroyed early.\n        server_pipe = None\n\n        listener: tp.Listener = context.listen([\"tcp://127.0.0.1\"])\n\n        write_completed = threading.Event()\n\n        def on_connection(pipe: tp.Pipe) -> None:\n            global server_pipe\n            payload = tp.OutgoingPayload(b\"Hello \", b\"a greeting\")\n            tensor = tp.OutgoingTensor(b\"World!\", b\"a place\")\n            message = tp.OutgoingMessage(b\"metadata\", [payload], [tensor])\n            pipe.write(message, on_write)\n            server_pipe = pipe\n\n        def on_write() -> None:\n            write_completed.set()\n\n        listener.listen(on_connection)\n\n        client_pipe: tp.Pipe = context.connect(listener.get_url(\"tcp\"))\n\n        received_payloads = None\n        received_tensors = None\n        read_completed = threading.Event()\n\n        def on_read_descriptor(message: tp.IncomingMessage) -> None:\n            nonlocal received_payloads, received_tensors\n            self.assertEqual(message.metadata, bytearray(b\"metadata\"))\n            received_payloads = []\n            for payload in message.payloads:\n                self.assertEqual(payload.metadata, bytearray(b\"a greeting\"))\n                received_payloads.append(bytearray(payload.length))\n                payload.buffer = received_payloads[-1]\n            received_tensors = []\n            for tensor in message.tensors:\n                self.assertEqual(tensor.metadata, bytearray(b\"a place\"))\n                received_tensors.append(bytearray(tensor.length))\n                tensor.buffer = received_tensors[-1]\n            client_pipe.read(message, on_read)\n\n        def on_read() -> None:\n            read_completed.set()\n\n        client_pipe.read_descriptor(on_read_descriptor)\n\n        write_completed.wait()\n        read_completed.wait()\n\n        self.assertEqual(received_payloads, [bytearray(b\"Hello \")])\n        self.assertEqual(received_tensors, [bytearray(b\"World!\")])\n\n        # Due to a current limitation we're not releasing the GIL when calling\n        # the context's destructor, which implicitly calls join, which may fire\n        # some callbacks that also try to acquire the GIL and thus deadlock.\n        # So, for now, we must explicitly call join.\n        # See https://github.com/pybind/pybind11/issues/1446.\n        context.join()\n\n\nif __name__ == \"__main__\":\n    unittest.main()\n"
  },
  {
    "path": "tensorpipe/test/test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <signal.h>\n\n// One-time init to use EPIPE errors instead of SIGPIPE\nnamespace {\n\nstruct Initializer {\n  explicit Initializer() {\n    signal(SIGPIPE, SIG_IGN);\n  }\n};\n\nInitializer initializer;\n\n} // namespace\n"
  },
  {
    "path": "tensorpipe/test/test_environment.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/test_environment.h>\n\n#if TP_USE_CUDA\n#include <cuda_runtime.h>\n#include <sys/types.h>\n#include <sys/wait.h>\n#include <tensorpipe/common/cuda.h>\n#include <tensorpipe/common/defs.h>\n#include <unistd.h>\n#endif // TP_USE_CUDA\n\nint TestEnvironment::numCudaDevices() {\n  static int count = -1;\n  if (count == -1) {\n#if TP_USE_CUDA\n    pid_t pid = fork();\n    TP_THROW_SYSTEM_IF(pid < 0, errno) << \"Failed to fork\";\n    if (pid == 0) {\n      int res;\n      TP_CUDA_CHECK(cudaGetDeviceCount(&res));\n      std::exit(res);\n    } else {\n      int status;\n      TP_THROW_SYSTEM_IF(waitpid(pid, &status, 0) < 0, errno)\n          << \"Failed to wait for child process\";\n      TP_THROW_ASSERT_IF(!WIFEXITED(status));\n      count = WEXITSTATUS(status);\n    }\n#else // TP_USE_CUDA\n    count = 0;\n#endif // TP_USE_CUDA\n  }\n\n  return count;\n}\n"
  },
  {
    "path": "tensorpipe/test/test_environment.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\nclass TestEnvironment {\n public:\n  static int numCudaDevices();\n};\n"
  },
  {
    "path": "tensorpipe/test/transport/connection_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/transport_test.h>\n\n#include <array>\n\n#include <nop/serializer.h>\n#include <nop/structure.h>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::transport;\n\nTEST_P(TransportTest, Connection_Initialization) {\n  constexpr size_t numBytes = 13;\n  std::array<char, numBytes> garbage;\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        doRead(\n            conn,\n            [&](const Error& error, const void* /* unused */, size_t len) {\n              ASSERT_FALSE(error) << error.what();\n              ASSERT_EQ(len, garbage.size());\n              peers_->done(PeerGroup::kServer);\n            });\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        doWrite(conn, garbage.data(), garbage.size(), [&](const Error& error) {\n          ASSERT_FALSE(error) << error.what();\n          peers_->done(PeerGroup::kClient);\n        });\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\nTEST_P(TransportTest, Connection_InitializationError) {\n  int numRequests = 10;\n\n  testConnection(\n      [&](std::shared_ptr<Connection> /* unused */) {\n        // Closes connection\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        for (int i = 0; i < numRequests; i++) {\n          std::promise<void> readCompletedProm;\n          doRead(\n              conn,\n              [&, conn](\n                  const Error& error,\n                  const void* /* unused */,\n                  size_t /* unused */) {\n                ASSERT_TRUE(error);\n                readCompletedProm.set_value();\n              });\n          readCompletedProm.get_future().wait();\n        }\n      });\n}\n\n// Disabled because no one really knows what this test was meant to check.\nTEST_P(TransportTest, DISABLED_Connection_DestroyConnectionFromCallback) {\n  testConnection(\n      [&](std::shared_ptr<Connection> /* unused */) {\n        // Closes connection\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        // This should be the only connection instance.\n        EXPECT_EQ(conn.use_count(), 1);\n        // Move connection instance to lambda scope, so we can destroy\n        // the only instance we have from the callback itself. This\n        // tests that the transport keeps the connection alive as long\n        // as it's executing a callback.\n        doRead(\n            conn,\n            [conn](\n                const Error& /* unused */,\n                const void* /* unused */,\n                size_t /* unused */) mutable {\n              // Destroy connection from within callback.\n              EXPECT_GT(conn.use_count(), 1);\n              conn.reset();\n            });\n      });\n}\n\nnamespace {\n\nstruct MyNopType {\n  uint32_t myIntField;\n  NOP_STRUCTURE(MyNopType, myIntField);\n};\n\n} // namespace\n\nTEST_P(TransportTest, Connection_NopWrite) {\n  constexpr size_t kSize = 0x42;\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        auto holder = std::make_shared<NopHolder<MyNopType>>();\n        MyNopType& object = holder->getObject();\n        conn->read(*holder, [&, conn, holder](const Error& error) {\n          ASSERT_FALSE(error) << error.what();\n          ASSERT_EQ(object.myIntField, kSize);\n          peers_->done(PeerGroup::kServer);\n        });\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        auto holder = std::make_shared<NopHolder<MyNopType>>();\n        MyNopType& object = holder->getObject();\n        object.myIntField = kSize;\n        conn->write(*holder, [&, conn, holder](const Error& error) {\n          ASSERT_FALSE(error) << error.what();\n          peers_->done(PeerGroup::kClient);\n        });\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\nTEST_P(TransportTest, Connection_QueueWritesBeforeReads) {\n  constexpr int kMsgSize = 16 * 1024;\n  constexpr int numMsg = 10;\n  const std::string kReady = \"ready\";\n  std::string msg[numMsg];\n\n  for (int i = 0; i < numMsg; i++) {\n    msg[i] = std::string(kMsgSize, static_cast<char>(i));\n  }\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        for (int i = 0; i < numMsg; i++) {\n          doWrite(\n              conn,\n              msg[i].c_str(),\n              msg[i].length(),\n              [&, conn, i](const Error& error) {\n                ASSERT_FALSE(error) << error.what();\n                if (i == numMsg - 1) {\n                  peers_->send(PeerGroup::kClient, kReady);\n                  peers_->done(PeerGroup::kServer);\n                }\n              });\n        }\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        ASSERT_EQ(kReady, peers_->recv(PeerGroup::kClient));\n        for (int i = 0; i < numMsg; i++) {\n          doRead(\n              conn,\n              [&, conn, i](const Error& error, const void* data, size_t len) {\n                ASSERT_FALSE(error) << error.what();\n                ASSERT_EQ(len, msg[i].length());\n                const char* cdata = (const char*)data;\n                for (int j = 0; j < len; ++j) {\n                  const char c = cdata[j];\n                  ASSERT_EQ(c, msg[i][j]) << \"Wrong value at position \" << j\n                                          << \" of \" << msg[i].length();\n                }\n                if (i == numMsg - 1) {\n                  peers_->done(PeerGroup::kClient);\n                }\n              });\n        }\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\n// TODO: Enable this test when uv transport could handle\nTEST_P(TransportTest, DISABLED_Connection_EmptyBuffer) {\n  constexpr size_t numBytes = 13;\n  std::array<char, numBytes> garbage;\n  int ioNum = 100;\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        std::atomic<int> n(ioNum);\n        for (int i = 0; i < ioNum; i++) {\n          if (i % 2 == 0) {\n            // Empty buffer\n            doRead(\n                conn,\n                nullptr,\n                0,\n                [&, conn](const Error& error, const void* ptr, size_t len) {\n                  ASSERT_FALSE(error) << error.what();\n                  ASSERT_EQ(len, 0);\n                  ASSERT_EQ(ptr, nullptr);\n                  if (--n == 0) {\n                    peers_->done(PeerGroup::kServer);\n                  }\n                });\n          } else {\n            // Garbage buffer\n            doRead(\n                conn,\n                [&, conn](\n                    const Error& error, const void* /* unused */, size_t len) {\n                  ASSERT_FALSE(error) << error.what();\n                  ASSERT_EQ(len, garbage.size());\n                  if (--n == 0) {\n                    peers_->done(PeerGroup::kServer);\n                  }\n                });\n          }\n        }\n\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        std::atomic<int> n(ioNum);\n        for (int i = 0; i < ioNum; i++) {\n          if ((i & 1) == 0) {\n            // Empty buffer\n            doWrite(conn, nullptr, 0, [&, conn](const Error& error) {\n              ASSERT_FALSE(error) << error.what();\n              if (--n == 0) {\n                peers_->done(PeerGroup::kClient);\n              }\n            });\n          } else {\n            // Garbage buffer\n            doWrite(\n                conn,\n                garbage.data(),\n                garbage.size(),\n                [&, conn](const Error& error) {\n                  ASSERT_FALSE(error) << error.what();\n                  if (--n == 0) {\n                    peers_->done(PeerGroup::kClient);\n                  }\n                });\n          }\n        }\n\n        peers_->join(PeerGroup::kClient);\n      });\n}\n"
  },
  {
    "path": "tensorpipe/test/transport/context_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/transport_test.h>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::transport;\n\nTEST_P(TransportTest, Context_Basics) {\n  auto context = GetParam()->getContext();\n  auto addr = GetParam()->defaultAddr();\n\n  {\n    std::mutex mutex;\n    std::condition_variable cv;\n    std::vector<std::shared_ptr<Connection>> connections;\n\n    // Listener runs callback for every new connection.\n    auto listener = context->listen(addr);\n    listener->accept(\n        [&](const Error& error, std::shared_ptr<Connection> connection) {\n          ASSERT_FALSE(error) << error.what();\n          std::lock_guard<std::mutex> lock(mutex);\n          connections.push_back(std::move(connection));\n          cv.notify_one();\n        });\n\n    // Connect to listener.\n    auto conn = context->connect(listener->addr());\n\n    // Wait for new connection\n    {\n      std::unique_lock<std::mutex> lock(mutex);\n      while (connections.empty()) {\n        cv.wait(lock);\n      }\n    }\n  }\n\n  context->join();\n}\n\nTEST_P(TransportTest, Context_DomainDescriptor) {\n  auto context = GetParam()->getContext();\n\n  {\n    const auto& domainDescriptor = context->domainDescriptor();\n    EXPECT_FALSE(domainDescriptor.empty());\n  }\n\n  context->join();\n}\n"
  },
  {
    "path": "tensorpipe/test/transport/ibv/connection_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/ibv/ibv_test.h>\n\n#include <gtest/gtest.h>\n#include <nop/serializer.h>\n#include <nop/structure.h>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::transport;\n\nnamespace {\n\nclass IbvTransportTest : public TransportTest {};\n\nIbvTransportTestHelper helper;\n\n// This value is defined in tensorpipe/transport/ibv/connection.h\nstatic constexpr auto kBufferSize = 2 * 1024 * 1024;\n\n} // namespace\n\nTEST_P(IbvTransportTest, Chunking) {\n  // This is larger than the default ring buffer size.\n  const int kMsgSize = 5 * kBufferSize;\n  std::string srcBuf(kMsgSize, 0x42);\n  auto dstBuf = std::make_unique<char[]>(kMsgSize);\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        doRead(\n            conn,\n            dstBuf.get(),\n            kMsgSize,\n            [&, conn](const Error& error, const void* ptr, size_t len) {\n              ASSERT_FALSE(error) << error.what();\n              ASSERT_EQ(len, kMsgSize);\n              ASSERT_EQ(ptr, dstBuf.get());\n              for (int i = 0; i < kMsgSize; ++i) {\n                ASSERT_EQ(dstBuf[i], srcBuf[i]);\n              }\n              peers_->done(PeerGroup::kServer);\n            });\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        doWrite(\n            conn,\n            srcBuf.c_str(),\n            srcBuf.length(),\n            [&, conn](const Error& error) {\n              ASSERT_FALSE(error) << error.what();\n              peers_->done(PeerGroup::kClient);\n            });\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\nTEST_P(IbvTransportTest, ChunkingImplicitRead) {\n  // This is larger than the default ring buffer size.\n  const size_t kMsgSize = 5 * kBufferSize;\n  std::string msg(kMsgSize, 0x42);\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        doRead(\n            conn, [&, conn](const Error& error, const void* ptr, size_t len) {\n              ASSERT_FALSE(error) << error.what();\n              ASSERT_EQ(len, kMsgSize);\n              for (int i = 0; i < kMsgSize; ++i) {\n                ASSERT_EQ(static_cast<const uint8_t*>(ptr)[i], msg[i]);\n              }\n              peers_->done(PeerGroup::kServer);\n            });\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        doWrite(conn, msg.c_str(), msg.length(), [&, conn](const Error& error) {\n          ASSERT_FALSE(error) << error.what();\n          peers_->done(PeerGroup::kClient);\n        });\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\nTEST_P(IbvTransportTest, QueueWrites) {\n  // This is large enough that two of those will not fit in the ring buffer at\n  // the same time.\n  constexpr int numMsg = 2;\n  constexpr size_t numBytes = (3 * kBufferSize) / 4;\n  const std::string kReady = \"ready\";\n  std::array<char, numBytes> garbage;\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        // Wait for peer to queue up writes before attempting to read\n        EXPECT_EQ(kReady, peers_->recv(PeerGroup::kServer));\n\n        for (int i = 0; i < numMsg; ++i) {\n          doRead(\n              conn,\n              [&, conn, i](const Error& error, const void* ptr, size_t len) {\n                ASSERT_FALSE(error) << error.what();\n                ASSERT_EQ(len, numBytes);\n                if (i == numMsg - 1) {\n                  peers_->done(PeerGroup::kServer);\n                }\n              });\n        }\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        for (int i = 0; i < numMsg; ++i) {\n          doWrite(\n              conn,\n              garbage.data(),\n              garbage.size(),\n              [&, conn, i](const Error& error) {\n                ASSERT_FALSE(error) << error.what();\n                if (i == numMsg - 1) {\n                  peers_->done(PeerGroup::kClient);\n                }\n              });\n        }\n        peers_->send(PeerGroup::kServer, kReady);\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\nnamespace {\n\nstruct MyNopType {\n  std::string myStringField;\n  NOP_STRUCTURE(MyNopType, myStringField);\n};\n\n} // namespace\n\nTEST_P(IbvTransportTest, NopWriteWrapAround) {\n  constexpr int numMsg = 2;\n  constexpr size_t kSize = (3 * kBufferSize) / 4;\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        for (int i = 0; i < numMsg; ++i) {\n          auto holder = std::make_shared<NopHolder<MyNopType>>();\n          conn->read(*holder, [&, conn, holder, i](const Error& error) {\n            ASSERT_FALSE(error) << error.what();\n            ASSERT_EQ(holder->getObject().myStringField.length(), kSize);\n            if (i == numMsg - 1) {\n              peers_->done(PeerGroup::kServer);\n            }\n          });\n        }\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        for (int i = 0; i < numMsg; ++i) {\n          auto holder = std::make_shared<NopHolder<MyNopType>>();\n          holder->getObject().myStringField = std::string(kSize, 'B');\n          conn->write(*holder, [&, conn, holder, i](const Error& error) {\n            ASSERT_FALSE(error) << error.what();\n            if (i == numMsg - 1) {\n              peers_->done(PeerGroup::kClient);\n            }\n          });\n        }\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\nINSTANTIATE_TEST_CASE_P(Ibv, IbvTransportTest, ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/transport/ibv/context_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/ibv/ibv_test.h>\n#include <tensorpipe/transport/ibv/utility.h>\n\n#include <gtest/gtest.h>\n\nnamespace {\n\nclass IbvTransportContextTest : public TransportTest {};\n\nIbvTransportTestHelper helper;\n\n} // namespace\n\nusing namespace tensorpipe;\n\n// Linux-only because OSX machines on CircleCI cannot resolve their hostname\n#ifdef __linux__\nTEST_P(IbvTransportContextTest, LookupHostnameAddress) {\n  Error error;\n  std::string addr;\n  std::tie(error, addr) = transport::ibv::lookupAddrForHostname();\n  EXPECT_FALSE(error) << error.what();\n  EXPECT_NE(addr, \"\");\n}\n#endif\n\n// Interface name conventions change based on platform. Linux uses \"lo\", OSX\n// uses lo0, Windows uses integers.\n#ifdef __linux__\n#define LOOPBACK_INTERFACE \"lo\"\n#elif __APPLE__\n#define LOOPBACK_INTERFACE \"lo0\"\n#endif\n\n#ifdef LOOPBACK_INTERFACE\nTEST_P(IbvTransportContextTest, LookupInterfaceAddress) {\n  Error error;\n  std::string addr;\n  std::tie(error, addr) =\n      transport::ibv::lookupAddrForIface(LOOPBACK_INTERFACE);\n  EXPECT_FALSE(error) << error.what();\n  EXPECT_NE(addr, \"\");\n}\n#endif\n\nINSTANTIATE_TEST_CASE_P(\n    Ibv,\n    IbvTransportContextTest,\n    ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/transport/ibv/ibv_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/ibv/ibv_test.h>\n\nnamespace {\n\nIbvTransportTestHelper helper;\n\n} // namespace\n\nINSTANTIATE_TEST_CASE_P(Ibv, TransportTest, ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/transport/ibv/ibv_test.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <tensorpipe/test/transport/transport_test.h>\n#include <tensorpipe/transport/ibv/factory.h>\n\nclass IbvTransportTestHelper : public TransportTestHelper {\n protected:\n  std::shared_ptr<tensorpipe::transport::Context> getContextInternal()\n      override {\n    return tensorpipe::transport::ibv::create();\n  }\n\n public:\n  std::string defaultAddr() override {\n    return \"127.0.0.1\";\n  }\n};\n"
  },
  {
    "path": "tensorpipe/test/transport/ibv/sockaddr_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/ibv/sockaddr.h>\n\n#include <netinet/in.h>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe::transport;\n\nnamespace {\n\nint family(const ibv::Sockaddr& addr) {\n  auto sockaddr = addr.addr();\n  return sockaddr->sa_family;\n}\n\nint port(const ibv::Sockaddr& addr) {\n  auto sockaddr = addr.addr();\n  if (sockaddr->sa_family == AF_INET) {\n    auto in = reinterpret_cast<const struct sockaddr_in*>(sockaddr);\n    return in->sin_port;\n  }\n  if (sockaddr->sa_family == AF_INET6) {\n    auto in6 = reinterpret_cast<const struct sockaddr_in6*>(sockaddr);\n    return in6->sin6_port;\n  }\n  return -1;\n}\n\n} // namespace\n\nTEST(IbvSockaddr, InetBadPort) {\n  ASSERT_THROW(\n      ibv::Sockaddr::createInetSockAddr(\"1.2.3.4:-1\"), std::invalid_argument);\n  ASSERT_THROW(\n      ibv::Sockaddr::createInetSockAddr(\"1.2.3.4:65536\"),\n      std::invalid_argument);\n}\n\nTEST(IbvSockaddr, Inet) {\n  {\n    auto sa = ibv::Sockaddr::createInetSockAddr(\"1.2.3.4:5\");\n    ASSERT_EQ(family(sa), AF_INET);\n    ASSERT_EQ(port(sa), ntohs(5));\n    ASSERT_EQ(sa.str(), \"1.2.3.4:5\");\n  }\n\n  {\n    auto sa = ibv::Sockaddr::createInetSockAddr(\"1.2.3.4:0\");\n    ASSERT_EQ(family(sa), AF_INET);\n    ASSERT_EQ(port(sa), 0);\n    ASSERT_EQ(sa.str(), \"1.2.3.4:0\");\n  }\n\n  {\n    auto sa = ibv::Sockaddr::createInetSockAddr(\"1.2.3.4\");\n    ASSERT_EQ(family(sa), AF_INET);\n    ASSERT_EQ(port(sa), 0);\n    ASSERT_EQ(sa.str(), \"1.2.3.4:0\");\n  }\n}\n\nTEST(IbvSockaddr, Inet6BadPort) {\n  ASSERT_THROW(\n      ibv::Sockaddr::createInetSockAddr(\"[::1]:-1\"), std::invalid_argument);\n  ASSERT_THROW(\n      ibv::Sockaddr::createInetSockAddr(\"[::1]:65536\"), std::invalid_argument);\n  ASSERT_THROW(\n      ibv::Sockaddr::createInetSockAddr(\"]::1[\"), std::invalid_argument);\n}\n\n// Interface name conventions change based on platform. Linux uses \"lo\", OSX\n// uses lo0, Windows uses integers.\n#ifdef __linux__\n#define LOOPBACK_INTERFACE \"lo\"\n#elif __APPLE__\n#define LOOPBACK_INTERFACE \"lo0\"\n#endif\n\nTEST(IbvSockaddr, Inet6) {\n  {\n    auto sa = ibv::Sockaddr::createInetSockAddr(\"[::1]:5\");\n    ASSERT_EQ(family(sa), AF_INET6);\n    ASSERT_EQ(port(sa), ntohs(5));\n    ASSERT_EQ(sa.str(), \"[::1]:5\");\n  }\n\n  {\n    auto sa = ibv::Sockaddr::createInetSockAddr(\"[::1]:0\");\n    ASSERT_EQ(family(sa), AF_INET6);\n    ASSERT_EQ(port(sa), 0);\n    ASSERT_EQ(sa.str(), \"[::1]:0\");\n  }\n\n  {\n    auto sa = ibv::Sockaddr::createInetSockAddr(\"::1\");\n    ASSERT_EQ(family(sa), AF_INET6);\n    ASSERT_EQ(port(sa), 0);\n    ASSERT_EQ(sa.str(), \"[::1]:0\");\n  }\n\n#ifdef LOOPBACK_INTERFACE\n  {\n    auto sa = ibv::Sockaddr::createInetSockAddr(\"::1%\" LOOPBACK_INTERFACE);\n    ASSERT_EQ(family(sa), AF_INET6);\n    ASSERT_EQ(port(sa), 0);\n    ASSERT_EQ(sa.str(), \"[::1%\" LOOPBACK_INTERFACE \"]:0\");\n  }\n\n  {\n    sockaddr_in6 sa;\n    std::memset(&sa, 0, sizeof(sa));\n    sa.sin6_family = AF_INET6;\n    sa.sin6_port = ntohs(42);\n    sa.sin6_flowinfo = 0;\n    sa.sin6_addr.s6_addr[15] = 1;\n    // Implicitly assuming that the loopback interface is the first one.\n    sa.sin6_scope_id = 1;\n    ibv::Sockaddr tpSa(reinterpret_cast<sockaddr*>(&sa), sizeof(sa));\n    ASSERT_EQ(tpSa.str(), \"[::1%\" LOOPBACK_INTERFACE \"]:42\");\n  }\n#endif\n}\n"
  },
  {
    "path": "tensorpipe/test/transport/listener_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/transport_test.h>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::transport;\n\nTEST_P(TransportTest, Listener_Basics) {\n  auto context = GetParam()->getContext();\n  auto addr = GetParam()->defaultAddr();\n\n  {\n    std::mutex mutex;\n    std::condition_variable cv;\n    std::vector<std::shared_ptr<Connection>> connections;\n\n    // Listener runs callback for every new connection.\n    auto listener = context->listen(addr);\n    listener->accept(\n        [&](const Error& error, std::shared_ptr<Connection> connection) {\n          ASSERT_FALSE(error) << error.what();\n          std::lock_guard<std::mutex> lock(mutex);\n          connections.push_back(std::move(connection));\n          cv.notify_one();\n        });\n\n    // Connect to listener.\n    auto connection = context->connect(listener->addr());\n\n    // Wait for new connection\n    {\n      std::unique_lock<std::mutex> lock(mutex);\n      while (connections.empty()) {\n        cv.wait(lock);\n      }\n    }\n  }\n\n  context->join();\n}\n\nTEST_P(TransportTest, Listener_AcceptCallbacksAreQueued) {\n  auto context = GetParam()->getContext();\n  auto addr = GetParam()->defaultAddr();\n\n  {\n    auto listener = context->listen(addr);\n    int numAccepts = 0;\n    std::promise<void> donePromise;\n    for (int i = 0; i < 10; ++i) {\n      listener->accept(\n          [&, i](const Error& error, std::shared_ptr<Connection> /*unused*/) {\n            if (error) {\n              donePromise.set_exception(\n                  std::make_exception_ptr(std::runtime_error(error.what())));\n            } else {\n              EXPECT_EQ(i, numAccepts);\n              numAccepts++;\n              if (numAccepts == 10) {\n                donePromise.set_value();\n              }\n            }\n          });\n    }\n\n    // Avoid connections to be destroyed before being established.\n    std::vector<std::shared_ptr<Connection>> conns;\n    for (int i = 0; i < 10; ++i) {\n      auto c = context->connect(listener->addr());\n      conns.push_back(std::move(c));\n    }\n    donePromise.get_future().get();\n  }\n\n  context->join();\n}\n\nTEST_P(TransportTest, Listener_IncomingConnectionsAreQueued) {\n  auto context = GetParam()->getContext();\n  auto addr = GetParam()->defaultAddr();\n\n  {\n    auto listener = context->listen(addr);\n    int numAccepts = 0;\n    std::promise<void> donePromise;\n    // Avoid connections to be destroyed before being established.\n    std::vector<std::shared_ptr<Connection>> conns;\n    for (int i = 0; i < 10; ++i) {\n      auto c = context->connect(listener->addr());\n      conns.push_back(std::move(c));\n    }\n    for (int i = 0; i < 10; ++i) {\n      listener->accept(\n          [&, i](const Error& error, std::shared_ptr<Connection> /*unused*/) {\n            if (error) {\n              donePromise.set_exception(\n                  std::make_exception_ptr(std::runtime_error(error.what())));\n            } else {\n              EXPECT_EQ(i, numAccepts);\n              numAccepts++;\n              if (numAccepts == 10) {\n                donePromise.set_value();\n              }\n            }\n          });\n    }\n\n    donePromise.get_future().get();\n  }\n\n  context->join();\n}\n\nTEST_P(TransportTest, Listener_CreateThenCloseAndThenGetAddress) {\n  auto context = GetParam()->getContext();\n\n  auto listener = context->listen(GetParam()->defaultAddr());\n  listener->close();\n  auto addr = listener->addr();\n\n  std::promise<void> acceptPromise;\n  listener->accept(\n      [&](const Error& error, std::shared_ptr<Connection> /*unused*/) {\n        if (error) {\n          acceptPromise.set_exception(\n              std::make_exception_ptr(std::runtime_error(error.what())));\n        } else {\n          acceptPromise.set_value();\n        }\n      });\n\n  auto connection = context->connect(addr);\n  std::promise<void> writePromise;\n  connection->write(nullptr, 0, [&](const Error& error) {\n    if (error) {\n      writePromise.set_exception(\n          std::make_exception_ptr(std::runtime_error(error.what())));\n    } else {\n      writePromise.set_value();\n    }\n  });\n\n  try {\n    acceptPromise.get_future().get();\n  } catch (const std::runtime_error&) {\n    // Expected\n  }\n\n  try {\n    writePromise.get_future().get();\n  } catch (const std::runtime_error&) {\n    // Expected\n  }\n\n  context->join();\n}\n\nTEST_P(TransportTest, Listener_CreateAfterClosingContextAndThenGetAddress) {\n  auto context = GetParam()->getContext();\n\n  // This means the listener will be created in an already-closed state.\n  context->close();\n  auto listener = context->listen(GetParam()->defaultAddr());\n  auto addr = listener->addr();\n\n  std::promise<void> acceptPromise;\n  listener->accept(\n      [&](const Error& error, std::shared_ptr<Connection> /*unused*/) {\n        if (error) {\n          acceptPromise.set_exception(\n              std::make_exception_ptr(std::runtime_error(error.what())));\n        } else {\n          acceptPromise.set_value();\n        }\n      });\n\n  auto connection = context->connect(addr);\n  std::promise<void> writePromise;\n  connection->write(nullptr, 0, [&](const Error& error) {\n    if (error) {\n      writePromise.set_exception(\n          std::make_exception_ptr(std::runtime_error(error.what())));\n    } else {\n      writePromise.set_value();\n    }\n  });\n\n  try {\n    acceptPromise.get_future().get();\n  } catch (const std::runtime_error&) {\n    // Expected\n  }\n\n  try {\n    writePromise.get_future().get();\n  } catch (const std::runtime_error&) {\n    // Expected\n  }\n\n  context->join();\n}\n"
  },
  {
    "path": "tensorpipe/test/transport/shm/connection_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/shm/shm_test.h>\n\n#include <gtest/gtest.h>\n#include <nop/serializer.h>\n#include <nop/structure.h>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::transport;\n\nnamespace {\n\nclass ShmTransportTest : public TransportTest {};\n\nSHMTransportTestHelper helper;\n\n// This value is defined in tensorpipe/transport/shm/connection.h\nstatic constexpr auto kBufferSize = 2 * 1024 * 1024;\n\n} // namespace\n\nTEST_P(ShmTransportTest, Chunking) {\n  // This is larger than the default ring buffer size.\n  const int kMsgSize = 5 * kBufferSize;\n  std::string srcBuf(kMsgSize, 0x42);\n  auto dstBuf = std::make_unique<char[]>(kMsgSize);\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        doRead(\n            conn,\n            dstBuf.get(),\n            kMsgSize,\n            [&, conn](const Error& error, const void* ptr, size_t len) {\n              ASSERT_FALSE(error) << error.what();\n              ASSERT_EQ(len, kMsgSize);\n              ASSERT_EQ(ptr, dstBuf.get());\n              for (int i = 0; i < kMsgSize; ++i) {\n                ASSERT_EQ(dstBuf[i], srcBuf[i]);\n              }\n              peers_->done(PeerGroup::kServer);\n            });\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        doWrite(\n            conn,\n            srcBuf.c_str(),\n            srcBuf.length(),\n            [&, conn](const Error& error) {\n              ASSERT_FALSE(error) << error.what();\n              peers_->done(PeerGroup::kClient);\n            });\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\nTEST_P(ShmTransportTest, ChunkingImplicitRead) {\n  // This is larger than the default ring buffer size.\n  const size_t kMsgSize = 5 * kBufferSize;\n  std::string msg(kMsgSize, 0x42);\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        doRead(\n            conn, [&, conn](const Error& error, const void* ptr, size_t len) {\n              ASSERT_FALSE(error) << error.what();\n              ASSERT_EQ(len, kMsgSize);\n              for (int i = 0; i < kMsgSize; ++i) {\n                ASSERT_EQ(static_cast<const uint8_t*>(ptr)[i], msg[i]);\n              }\n              peers_->done(PeerGroup::kServer);\n            });\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        doWrite(conn, msg.c_str(), msg.length(), [&, conn](const Error& error) {\n          ASSERT_FALSE(error) << error.what();\n          peers_->done(PeerGroup::kClient);\n        });\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\nTEST_P(ShmTransportTest, QueueWrites) {\n  // This is large enough that two of those will not fit in the ring buffer at\n  // the same time.\n  constexpr int numMsg = 2;\n  constexpr size_t numBytes = (3 * kBufferSize) / 4;\n  const std::string kReady = \"ready\";\n  std::array<char, numBytes> garbage;\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        // Wait for peer to queue up writes before attempting to read\n        EXPECT_EQ(kReady, peers_->recv(PeerGroup::kServer));\n\n        for (int i = 0; i < numMsg; ++i) {\n          doRead(\n              conn,\n              [&, conn, i](const Error& error, const void* ptr, size_t len) {\n                ASSERT_FALSE(error) << error.what();\n                ASSERT_EQ(len, numBytes);\n                if (i == numMsg - 1) {\n                  peers_->done(PeerGroup::kServer);\n                }\n              });\n        }\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        for (int i = 0; i < numMsg; ++i) {\n          doWrite(\n              conn,\n              garbage.data(),\n              garbage.size(),\n              [&, conn, i](const Error& error) {\n                ASSERT_FALSE(error) << error.what();\n                if (i == numMsg - 1) {\n                  peers_->done(PeerGroup::kClient);\n                }\n              });\n        }\n        peers_->send(PeerGroup::kServer, kReady);\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\nnamespace {\n\nstruct MyNopType {\n  std::string myStringField;\n  NOP_STRUCTURE(MyNopType, myStringField);\n};\n\n} // namespace\n\nTEST_P(ShmTransportTest, NopWriteWrapAround) {\n  constexpr int numMsg = 2;\n  constexpr size_t kSize = (3 * kBufferSize) / 4;\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        for (int i = 0; i < numMsg; ++i) {\n          auto holder = std::make_shared<NopHolder<MyNopType>>();\n          conn->read(*holder, [&, conn, holder, i](const Error& error) {\n            ASSERT_FALSE(error) << error.what();\n            ASSERT_EQ(holder->getObject().myStringField.length(), kSize);\n            if (i == numMsg - 1) {\n              peers_->done(PeerGroup::kServer);\n            }\n          });\n        }\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        for (int i = 0; i < numMsg; ++i) {\n          auto holder = std::make_shared<NopHolder<MyNopType>>();\n          holder->getObject().myStringField = std::string(kSize, 'B');\n          conn->write(*holder, [&, conn, holder, i](const Error& error) {\n            ASSERT_FALSE(error) << error.what();\n            if (i == numMsg - 1) {\n              peers_->done(PeerGroup::kClient);\n            }\n          });\n        }\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\nINSTANTIATE_TEST_CASE_P(Shm, ShmTransportTest, ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/transport/shm/listener_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/shm/shm_test.h>\n\n#include <chrono>\n#include <future>\n\n#include <gmock/gmock.h>\n#include <gtest/gtest.h>\n#include <nop/serializer.h>\n#include <nop/structure.h>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::transport;\n\nnamespace {\n\nclass ShmListenerTest : public TransportTest {};\n\nSHMTransportTestHelper helper;\n\nstd::string generateUniqueAddr() {\n  const ::testing::TestInfo* const testInfo =\n      ::testing::UnitTest::GetInstance()->current_test_info();\n  std::ostringstream ss;\n  ss << \"tensorpipe_test_\" << testInfo->test_suite_name() << \".\"\n     << testInfo->name() << \"_\" << ::getpid();\n  return ss.str();\n}\n\n} // namespace\n\nTEST_P(ShmListenerTest, ExplicitAbstractSocketName) {\n  std::string expectedAddr = generateUniqueAddr();\n  std::shared_ptr<Context> ctx = GetParam()->getContext();\n  std::shared_ptr<Listener> listener = ctx->listen(expectedAddr);\n  std::string actualAddr = listener->addr();\n  ASSERT_EQ(actualAddr, expectedAddr);\n  std::shared_ptr<Connection> outgoingConnection = ctx->connect(actualAddr);\n  std::promise<void> prom;\n  listener->accept(\n      [&](const Error& error, std::shared_ptr<Connection> /* unused */) {\n        EXPECT_FALSE(error) << error.what();\n        prom.set_value();\n      });\n  std::future_status res = prom.get_future().wait_for(std::chrono::seconds(1));\n  ASSERT_NE(res, std::future_status::timeout);\n}\n\nTEST_P(ShmListenerTest, AutobindAbstractSocketName) {\n  std::shared_ptr<Context> ctx = GetParam()->getContext();\n  std::shared_ptr<Listener> listener = ctx->listen(\"\");\n  std::string addr = listener->addr();\n  ASSERT_NE(addr, \"\");\n  // Since Linux 2.3.15 (Aug 1999) the address is in this format, see unix(7).\n  ASSERT_THAT(addr, ::testing::MatchesRegex(\"[0-9a-f]{5}\"));\n  std::shared_ptr<Connection> outgoingConnection = ctx->connect(addr);\n  std::promise<void> prom;\n  listener->accept(\n      [&](const Error& error, std::shared_ptr<Connection> /* unused */) {\n        EXPECT_FALSE(error) << error.what();\n        prom.set_value();\n      });\n  std::future_status res = prom.get_future().wait_for(std::chrono::seconds(1));\n  ASSERT_NE(res, std::future_status::timeout);\n}\n\nINSTANTIATE_TEST_CASE_P(Shm, ShmListenerTest, ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/transport/shm/reactor_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <sys/types.h>\n#include <unistd.h>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/queue.h>\n#include <tensorpipe/common/socket.h>\n#include <tensorpipe/transport/shm/reactor.h>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::transport::shm;\n\nnamespace {\n\nvoid run(std::function<void(int)> fn1, std::function<void(int)> fn2) {\n  int fds[2];\n\n  {\n    auto rv = socketpair(AF_UNIX, SOCK_STREAM, 0, fds);\n    if (rv != 0) {\n      TP_THROW_SYSTEM(errno) << \"Failed to create socket pair\";\n    }\n  }\n\n  {\n    auto pid = fork();\n    TP_DCHECK_GE(pid, 0);\n    if (pid == 0) {\n      close(fds[0]);\n      fn2(fds[1]);\n      close(fds[1]);\n      exit(0);\n    }\n  }\n\n  close(fds[1]);\n  fn1(fds[0]);\n  close(fds[0]);\n  wait(nullptr);\n}\n\n} // namespace\n\nTEST(ShmReactor, Basic) {\n  run(\n      [](int fd) {\n        tensorpipe::Queue<int> queue;\n        auto reactor = std::make_shared<Reactor>();\n        auto token1 = reactor->add([&] { queue.push(1); });\n        auto token2 = reactor->add([&] { queue.push(2); });\n\n        // Share reactor fds and token with other process.\n        {\n          auto socket = Socket(fd);\n          auto fds = reactor->fds();\n          auto error = socket.sendPayloadAndFds(\n              token1, token2, std::get<0>(fds), std::get<1>(fds));\n          ASSERT_FALSE(error) << error.what();\n        }\n\n        // Wait for other process to run trigger.\n        ASSERT_EQ(queue.pop(), 1);\n        ASSERT_EQ(queue.pop(), 2);\n\n        reactor->remove(token1);\n        reactor->remove(token2);\n      },\n      [](int fd) {\n        Reactor::TToken token1;\n        Reactor::TToken token2;\n        Fd header;\n        Fd data;\n\n        // Wait for other process to share reactor fds and token.\n        {\n          auto socket = Socket(fd);\n          auto error = socket.recvPayloadAndFds(token1, token2, header, data);\n          ASSERT_FALSE(error) << error.what();\n        }\n\n        // Create and run trigger. This should wake up the other\n        // process and run the registered function.\n        Reactor::Trigger trigger(std::move(header), std::move(data));\n        trigger.run(token1);\n        trigger.run(token2);\n      });\n}\n\nTEST(ShmReactor, TokenReuse) {\n  tensorpipe::Queue<int> queue(3);\n  auto reactor = std::make_shared<Reactor>();\n  auto t1 = reactor->add([&] { queue.push(1); });\n  auto t2 = reactor->add([&] { queue.push(2); });\n  auto t3 = reactor->add([&] { queue.push(3); });\n\n  // Check that they're monotonically increasing.\n  ASSERT_GT(t2, t1);\n  ASSERT_GT(t3, t2);\n\n  // Remove token and check that it is reused.\n  reactor->remove(t1);\n  auto t4 = reactor->add([&] { queue.push(4); });\n  ASSERT_EQ(t4, t1);\n\n  // Remove multiple tokens and check that they're reused in order.\n  reactor->remove(t2);\n  reactor->remove(t3);\n  auto t5 = reactor->add([&] { queue.push(5); });\n  auto t6 = reactor->add([&] { queue.push(6); });\n  ASSERT_EQ(t5, t2);\n  ASSERT_EQ(t6, t3);\n\n  reactor->remove(t4);\n  reactor->remove(t5);\n  reactor->remove(t6);\n}\n"
  },
  {
    "path": "tensorpipe/test/transport/shm/shm_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/shm/shm_test.h>\n\nnamespace {\n\nSHMTransportTestHelper helper;\n\n} // namespace\n\nINSTANTIATE_TEST_CASE_P(Shm, TransportTest, ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/transport/shm/shm_test.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <sstream>\n\n#include <tensorpipe/test/transport/transport_test.h>\n#include <tensorpipe/transport/shm/factory.h>\n\nclass SHMTransportTestHelper : public TransportTestHelper {\n protected:\n  std::shared_ptr<tensorpipe::transport::Context> getContextInternal()\n      override {\n    return tensorpipe::transport::shm::create();\n  }\n\n public:\n  std::string defaultAddr() override {\n    return \"\";\n  }\n};\n"
  },
  {
    "path": "tensorpipe/test/transport/shm/sockaddr_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/shm/sockaddr.h>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe::transport;\n\nTEST(ShmSockaddr, FromToString) {\n  auto addr = shm::Sockaddr::createAbstractUnixAddr(\"foo\");\n  ASSERT_EQ(addr.str(), std::string(\"foo\"));\n}\n"
  },
  {
    "path": "tensorpipe/test/transport/transport_test.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <future>\n#include <memory>\n\n#include <gtest/gtest.h>\n\n#include <tensorpipe/test/peer_group.h>\n#include <tensorpipe/transport/connection.h>\n#include <tensorpipe/transport/context.h>\n#include <tensorpipe/transport/listener.h>\n\nclass TransportTestHelper {\n public:\n  std::shared_ptr<tensorpipe::transport::Context> getContext(\n      bool skipViabilityCheck = false) {\n    std::shared_ptr<tensorpipe::transport::Context> ctx = getContextInternal();\n    if (!skipViabilityCheck) {\n      EXPECT_TRUE(ctx->isViable());\n    }\n    return ctx;\n  }\n\n  virtual std::string defaultAddr() = 0;\n\n  virtual std::unique_ptr<PeerGroup> makePeerGroup() {\n    return std::make_unique<ThreadPeerGroup>();\n  }\n\n  virtual ~TransportTestHelper() = default;\n\n protected:\n  virtual std::shared_ptr<tensorpipe::transport::Context>\n  getContextInternal() = 0;\n};\n\nclass TransportTest : public ::testing::TestWithParam<TransportTestHelper*> {\n protected:\n  std::unique_ptr<PeerGroup> peers_;\n\n public:\n  TransportTest() : peers_(GetParam()->makePeerGroup()) {}\n\n  void testConnection(\n      std::function<void(std::shared_ptr<tensorpipe::transport::Connection>)>\n          listeningFn,\n      std::function<void(std::shared_ptr<tensorpipe::transport::Connection>)>\n          connectingFn) {\n    using namespace tensorpipe::transport;\n\n    peers_->spawn(\n        [&] {\n          auto ctx = GetParam()->getContext();\n          ctx->setId(\"server\");\n          auto addr = GetParam()->defaultAddr();\n          auto listener = ctx->listen(addr);\n          std::promise<std::shared_ptr<Connection>> connectionProm;\n          listener->accept([&](const tensorpipe::Error& error,\n                               std::shared_ptr<Connection> conn) {\n            ASSERT_FALSE(error) << error.what();\n            connectionProm.set_value(std::move(conn));\n          });\n\n          peers_->send(PeerGroup::kClient, listener->addr());\n\n          listeningFn(connectionProm.get_future().get());\n\n          ctx->join();\n        },\n        [&] {\n          auto ctx = GetParam()->getContext();\n          ctx->setId(\"client\");\n          auto listenerAddr = peers_->recv(PeerGroup::kClient);\n\n          connectingFn(ctx->connect(listenerAddr));\n\n          ctx->join();\n        });\n  }\n\n  // Add to a closure to check the callback is called before being destroyed\n  class Bomb {\n   public:\n    Bomb() = default;\n\n    Bomb(const Bomb&) = delete;\n    Bomb(Bomb&& b) {\n      defused_ = b.defused_;\n      b.defused_ = false;\n    }\n\n    Bomb& operator=(const Bomb&) = delete;\n    Bomb& operator=(Bomb&&) = delete;\n\n    void defuse() {\n      defused_ = true;\n    }\n\n    ~Bomb() {\n      EXPECT_TRUE(defused_);\n    }\n\n   private:\n    bool defused_ = false;\n  };\n\n  std::shared_ptr<Bomb> armBomb() {\n    return std::make_shared<Bomb>();\n  }\n\n  void doRead(\n      std::shared_ptr<tensorpipe::transport::Connection> conn,\n      tensorpipe::transport::Connection::read_callback_fn fn) {\n    auto mutex = std::make_shared<std::mutex>();\n    std::lock_guard<std::mutex> outerLock(*mutex);\n    // We acquire the same mutex while calling read and inside its callback so\n    // that we deadlock if the callback is invoked inline.\n    conn->read(\n        [fn{std::move(fn)}, mutex, bomb{armBomb()}](\n            const tensorpipe::Error& error, const void* ptr, size_t len) {\n          std::lock_guard<std::mutex> innerLock(*mutex);\n          bomb->defuse();\n          fn(error, ptr, len);\n        });\n  }\n\n  void doRead(\n      std::shared_ptr<tensorpipe::transport::Connection> conn,\n      void* ptr,\n      size_t length,\n      tensorpipe::transport::Connection::read_callback_fn fn) {\n    auto mutex = std::make_shared<std::mutex>();\n    std::lock_guard<std::mutex> outerLock(*mutex);\n    // We acquire the same mutex while calling read and inside its callback so\n    // that we deadlock if the callback is invoked inline.\n    conn->read(\n        ptr,\n        length,\n        [fn{std::move(fn)}, mutex, bomb{armBomb()}](\n            const tensorpipe::Error& error, const void* ptr, size_t len) {\n          std::lock_guard<std::mutex> innerLock(*mutex);\n          bomb->defuse();\n          fn(error, ptr, len);\n        });\n  }\n\n  void doWrite(\n      std::shared_ptr<tensorpipe::transport::Connection> conn,\n      const void* ptr,\n      size_t length,\n      tensorpipe::transport::Connection::write_callback_fn fn) {\n    auto mutex = std::make_shared<std::mutex>();\n    // We acquire the same mutex while calling write and inside its callback\n    // so that we deadlock if the callback is invoked inline.\n    std::lock_guard<std::mutex> outerLock(*mutex);\n    conn->write(\n        ptr,\n        length,\n        [fn{std::move(fn)}, mutex, bomb{armBomb()}](\n            const tensorpipe::Error& error) {\n          std::lock_guard<std::mutex> innerLock(*mutex);\n          bomb->defuse();\n          fn(error);\n        });\n  }\n};\n"
  },
  {
    "path": "tensorpipe/test/transport/uv/connection_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/uv/uv_test.h>\n\n#include <gtest/gtest.h>\n\nnamespace {\n\nclass UVTransportConnectionTest : public TransportTest {};\n\nUVTransportTestHelper helper;\n\n} // namespace\n\nusing namespace tensorpipe;\nusing namespace tensorpipe::transport;\n\nTEST_P(UVTransportConnectionTest, LargeWrite) {\n  constexpr int kMsgSize = 16 * 1024 * 1024;\n  std::string msg(kMsgSize, 0x42);\n\n  testConnection(\n      [&](std::shared_ptr<Connection> conn) {\n        doWrite(conn, msg.c_str(), msg.length(), [&, conn](const Error& error) {\n          ASSERT_FALSE(error) << error.what();\n          peers_->done(PeerGroup::kServer);\n        });\n        peers_->join(PeerGroup::kServer);\n      },\n      [&](std::shared_ptr<Connection> conn) {\n        doRead(\n            conn, [&, conn](const Error& error, const void* data, size_t len) {\n              ASSERT_FALSE(error) << error.what();\n              ASSERT_EQ(len, msg.length());\n              const char* cdata = (const char*)data;\n              for (int i = 0; i < len; ++i) {\n                const char c = cdata[i];\n                ASSERT_EQ(c, msg[i]) << \"Wrong value at position \" << i\n                                     << \" of \" << msg.length();\n              }\n              peers_->done(PeerGroup::kClient);\n            });\n        peers_->join(PeerGroup::kClient);\n      });\n}\n\nINSTANTIATE_TEST_CASE_P(\n    Uv,\n    UVTransportConnectionTest,\n    ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/transport/uv/context_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/uv/uv_test.h>\n#include <tensorpipe/transport/uv/utility.h>\n\n#include <gtest/gtest.h>\n\nnamespace {\n\nclass UVTransportContextTest : public TransportTest {};\n\nUVTransportTestHelper helper;\n\n} // namespace\n\nusing namespace tensorpipe;\n\n// Linux-only because OSX machines on CircleCI cannot resolve their hostname\n#ifdef __linux__\nTEST_P(UVTransportContextTest, LookupHostnameAddress) {\n  Error error;\n  std::string addr;\n  std::tie(error, addr) = transport::uv::lookupAddrForHostname();\n  EXPECT_FALSE(error) << error.what();\n  EXPECT_NE(addr, \"\");\n}\n#endif\n\n// Interface name conventions change based on platform. Linux uses \"lo\", OSX\n// uses lo0, Windows uses integers.\n#ifdef __linux__\n#define LOOPBACK_INTERFACE \"lo\"\n#elif __APPLE__\n#define LOOPBACK_INTERFACE \"lo0\"\n#endif\n\n#ifdef LOOPBACK_INTERFACE\nTEST_P(UVTransportContextTest, LookupInterfaceAddress) {\n  Error error;\n  std::string addr;\n  std::tie(error, addr) = transport::uv::lookupAddrForIface(LOOPBACK_INTERFACE);\n  EXPECT_FALSE(error) << error.what();\n  EXPECT_NE(addr, \"\");\n}\n#endif\n\nTEST_P(UVTransportContextTest, LookupAddressLikeNccl) {\n  Error error;\n  std::string addr;\n  std::tie(error, addr) = transport::uv::lookupAddrLikeNccl();\n  EXPECT_FALSE(error) << error.what();\n  EXPECT_NE(addr, \"\");\n}\n\nINSTANTIATE_TEST_CASE_P(Uv, UVTransportContextTest, ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/transport/uv/loop_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <gtest/gtest.h>\n\n#include <tensorpipe/transport/uv/loop.h>\n\nusing namespace tensorpipe::transport::uv;\n\nnamespace test {\nnamespace transport {\nnamespace uv {\n\nTEST(UvLoop, Defer) {\n  Loop loop;\n\n  {\n    // Defer function on event loop thread.\n    std::promise<std::thread::id> prom;\n    loop.deferToLoop([&] { prom.set_value(std::this_thread::get_id()); });\n    ASSERT_NE(std::this_thread::get_id(), prom.get_future().get());\n  }\n\n  loop.join();\n}\n\n} // namespace uv\n} // namespace transport\n} // namespace test\n"
  },
  {
    "path": "tensorpipe/test/transport/uv/sockaddr_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/uv/sockaddr.h>\n\n#include <netinet/in.h>\n\n#include <gtest/gtest.h>\n\nusing namespace tensorpipe::transport;\n\nnamespace {\n\nint family(const uv::Sockaddr& addr) {\n  auto sockaddr = addr.addr();\n  return sockaddr->sa_family;\n}\n\nint port(const uv::Sockaddr& addr) {\n  auto sockaddr = addr.addr();\n  if (sockaddr->sa_family == AF_INET) {\n    auto in = reinterpret_cast<const struct sockaddr_in*>(sockaddr);\n    return in->sin_port;\n  }\n  if (sockaddr->sa_family == AF_INET6) {\n    auto in6 = reinterpret_cast<const struct sockaddr_in6*>(sockaddr);\n    return in6->sin6_port;\n  }\n  return -1;\n}\n\n} // namespace\n\nTEST(UvSockaddr, InetBadPort) {\n  ASSERT_THROW(\n      uv::Sockaddr::createInetSockAddr(\"1.2.3.4:-1\"), std::invalid_argument);\n  ASSERT_THROW(\n      uv::Sockaddr::createInetSockAddr(\"1.2.3.4:65536\"), std::invalid_argument);\n}\n\nTEST(UvSockaddr, Inet) {\n  {\n    auto sa = uv::Sockaddr::createInetSockAddr(\"1.2.3.4:5\");\n    ASSERT_EQ(family(sa), AF_INET);\n    ASSERT_EQ(port(sa), ntohs(5));\n    ASSERT_EQ(sa.str(), \"1.2.3.4:5\");\n  }\n\n  {\n    auto sa = uv::Sockaddr::createInetSockAddr(\"1.2.3.4:0\");\n    ASSERT_EQ(family(sa), AF_INET);\n    ASSERT_EQ(port(sa), 0);\n    ASSERT_EQ(sa.str(), \"1.2.3.4:0\");\n  }\n\n  {\n    auto sa = uv::Sockaddr::createInetSockAddr(\"1.2.3.4\");\n    ASSERT_EQ(family(sa), AF_INET);\n    ASSERT_EQ(port(sa), 0);\n    ASSERT_EQ(sa.str(), \"1.2.3.4:0\");\n  }\n}\n\nTEST(UvSockaddr, Inet6BadPort) {\n  ASSERT_THROW(\n      uv::Sockaddr::createInetSockAddr(\"[::1]:-1\"), std::invalid_argument);\n  ASSERT_THROW(\n      uv::Sockaddr::createInetSockAddr(\"[::1]:65536\"), std::invalid_argument);\n  ASSERT_THROW(\n      uv::Sockaddr::createInetSockAddr(\"]::1[\"), std::invalid_argument);\n}\n\n// Interface name conventions change based on platform. Linux uses \"lo\", OSX\n// uses lo0, Windows uses integers.\n#ifdef __linux__\n#define LOOPBACK_INTERFACE \"lo\"\n#elif __APPLE__\n#define LOOPBACK_INTERFACE \"lo0\"\n#endif\n\nTEST(UvSockaddr, Inet6) {\n  {\n    auto sa = uv::Sockaddr::createInetSockAddr(\"[::1]:5\");\n    ASSERT_EQ(family(sa), AF_INET6);\n    ASSERT_EQ(port(sa), ntohs(5));\n    ASSERT_EQ(sa.str(), \"[::1]:5\");\n  }\n\n  {\n    auto sa = uv::Sockaddr::createInetSockAddr(\"[::1]:0\");\n    ASSERT_EQ(family(sa), AF_INET6);\n    ASSERT_EQ(port(sa), 0);\n    ASSERT_EQ(sa.str(), \"[::1]:0\");\n  }\n\n  {\n    auto sa = uv::Sockaddr::createInetSockAddr(\"::1\");\n    ASSERT_EQ(family(sa), AF_INET6);\n    ASSERT_EQ(port(sa), 0);\n    ASSERT_EQ(sa.str(), \"[::1]:0\");\n  }\n\n#ifdef LOOPBACK_INTERFACE\n  {\n    auto sa = uv::Sockaddr::createInetSockAddr(\"::1%\" LOOPBACK_INTERFACE);\n    ASSERT_EQ(family(sa), AF_INET6);\n    ASSERT_EQ(port(sa), 0);\n    ASSERT_EQ(sa.str(), \"[::1%\" LOOPBACK_INTERFACE \"]:0\");\n  }\n\n  {\n    sockaddr_in6 sa;\n    std::memset(&sa, 0, sizeof(sa));\n    sa.sin6_family = AF_INET6;\n    sa.sin6_port = ntohs(42);\n    sa.sin6_flowinfo = 0;\n    sa.sin6_addr.s6_addr[15] = 1;\n    // Implicitly assuming that the loopback interface is the first one.\n    sa.sin6_scope_id = 1;\n    uv::Sockaddr tpSa(reinterpret_cast<sockaddr*>(&sa), sizeof(sa));\n    ASSERT_EQ(tpSa.str(), \"[::1%\" LOOPBACK_INTERFACE \"]:42\");\n  }\n#endif\n}\n"
  },
  {
    "path": "tensorpipe/test/transport/uv/uv_test.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/test/transport/uv/uv_test.h>\n\nnamespace {\n\nUVTransportTestHelper helper;\n\n} // namespace\n\nINSTANTIATE_TEST_CASE_P(Uv, TransportTest, ::testing::Values(&helper));\n"
  },
  {
    "path": "tensorpipe/test/transport/uv/uv_test.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <tensorpipe/test/transport/transport_test.h>\n#include <tensorpipe/transport/uv/factory.h>\n\nclass UVTransportTestHelper : public TransportTestHelper {\n protected:\n  std::shared_ptr<tensorpipe::transport::Context> getContextInternal()\n      override {\n    return tensorpipe::transport::uv::create();\n  }\n\n public:\n  std::string defaultAddr() override {\n    return \"127.0.0.1\";\n  }\n};\n"
  },
  {
    "path": "tensorpipe/transport/connection.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <functional>\n#include <string>\n\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/nop.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace transport {\n\nclass Connection {\n public:\n  using read_callback_fn =\n      std::function<void(const Error& error, const void* ptr, size_t length)>;\n\n  virtual void read(read_callback_fn fn) = 0;\n\n  virtual void read(void* ptr, size_t length, read_callback_fn fn) = 0;\n\n  using write_callback_fn = std::function<void(const Error& error)>;\n\n  virtual void write(const void* ptr, size_t length, write_callback_fn fn) = 0;\n\n  //\n  // Helper functions for reading/writing nop objects.\n  //\n\n  // Read and parse a nop object.\n  //\n  // This function may be overridden by a subclass.\n  //\n  // For example, the shm transport may be able to bypass reading into a\n  // temporary buffer and instead instead read directly from its peer's\n  // ring buffer. This saves an allocation and a memory copy.\n  //\n  using read_nop_callback_fn = std::function<void(const Error& error)>;\n\n  virtual void read(AbstractNopHolder& object, read_nop_callback_fn fn) = 0;\n\n  // Serialize and write nop object.\n  //\n  // This function may be overridden by a subclass.\n  //\n  // For example, the shm transport may be able to bypass serialization\n  // into a temporary buffer and instead instead serialize directly into\n  // its peer's ring buffer. This saves an allocation and a memory copy.\n  //\n  virtual void write(const AbstractNopHolder& object, write_callback_fn fn) = 0;\n\n  // Tell the connection what its identifier is.\n  //\n  // This is only supposed to be called from the high-level pipe or from\n  // channels. It will only used for logging and debugging purposes.\n  virtual void setId(std::string id) = 0;\n\n  virtual void close() = 0;\n\n  virtual ~Connection() = default;\n};\n\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/connection_boilerplate.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstddef>\n#include <memory>\n#include <string>\n#include <type_traits>\n#include <utility>\n\n#include <tensorpipe/transport/connection.h>\n#include <tensorpipe/transport/connection_impl_boilerplate.h>\n\nnamespace tensorpipe {\nnamespace transport {\n\ntemplate <typename TCtx, typename TList, typename TConn>\nclass ConnectionBoilerplate : public Connection {\n public:\n  template <typename... Args>\n  ConnectionBoilerplate(\n      typename ConnectionImplBoilerplate<TCtx, TList, TConn>::ConstructorToken\n          token,\n      std::shared_ptr<TCtx> context,\n      std::string id,\n      Args... args);\n\n  explicit ConnectionBoilerplate(std::shared_ptr<TConn> connection);\n\n  ConnectionBoilerplate(const ConnectionBoilerplate&) = delete;\n  ConnectionBoilerplate(ConnectionBoilerplate&&) = delete;\n  ConnectionBoilerplate& operator=(const ConnectionBoilerplate&) = delete;\n  ConnectionBoilerplate& operator=(ConnectionBoilerplate&&) = delete;\n\n  // Queue a read operation.\n  void read(read_callback_fn fn) override;\n  void read(AbstractNopHolder& object, read_nop_callback_fn fn) override;\n  void read(void* ptr, size_t length, read_callback_fn fn) override;\n\n  // Perform a write operation.\n  void write(const void* ptr, size_t length, write_callback_fn fn) override;\n  void write(const AbstractNopHolder& object, write_callback_fn fn) override;\n\n  // Tell the connection what its identifier is.\n  void setId(std::string id) override;\n\n  // Shut down the connection and its resources.\n  void close() override;\n\n  ~ConnectionBoilerplate() override;\n\n protected:\n  // Using a shared_ptr allows us to detach the lifetime of the implementation\n  // from the public object's one and perform the destruction asynchronously.\n  const std::shared_ptr<TConn> impl_;\n};\n\ntemplate <typename TCtx, typename TList, typename TConn>\ntemplate <typename... Args>\nConnectionBoilerplate<TCtx, TList, TConn>::ConnectionBoilerplate(\n    typename ConnectionImplBoilerplate<TCtx, TList, TConn>::ConstructorToken\n        token,\n    std::shared_ptr<TCtx> context,\n    std::string id,\n    Args... args)\n    : impl_(std::make_shared<TConn>(\n          token,\n          std::move(context),\n          std::move(id),\n          std::forward<Args>(args)...)) {\n  static_assert(\n      std::is_base_of<ConnectionImplBoilerplate<TCtx, TList, TConn>, TConn>::\n          value,\n      \"\");\n  impl_->init();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nConnectionBoilerplate<TCtx, TList, TConn>::ConnectionBoilerplate(\n    std::shared_ptr<TConn> connection)\n    : impl_(std::move(connection)) {\n  static_assert(\n      std::is_base_of<ConnectionImplBoilerplate<TCtx, TList, TConn>, TConn>::\n          value,\n      \"\");\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionBoilerplate<TCtx, TList, TConn>::read(read_callback_fn fn) {\n  if (unlikely(!impl_)) {\n    // FIXME In C++-17 perhaps a global static inline variable would be better?\n    static Error error = TP_CREATE_ERROR(ContextNotViableError);\n    fn(error, nullptr, 0);\n    return;\n  }\n  impl_->read(std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionBoilerplate<TCtx, TList, TConn>::read(\n    AbstractNopHolder& object,\n    read_nop_callback_fn fn) {\n  if (unlikely(!impl_)) {\n    // FIXME In C++-17 perhaps a global static inline variable would be better?\n    static Error error = TP_CREATE_ERROR(ContextNotViableError);\n    fn(error);\n    return;\n  }\n  impl_->read(object, std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionBoilerplate<TCtx, TList, TConn>::read(\n    void* ptr,\n    size_t length,\n    read_callback_fn fn) {\n  if (unlikely(!impl_)) {\n    // FIXME In C++-17 perhaps a global static inline variable would be better?\n    static Error error = TP_CREATE_ERROR(ContextNotViableError);\n    fn(error, ptr, length);\n    return;\n  }\n  impl_->read(ptr, length, std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionBoilerplate<TCtx, TList, TConn>::write(\n    const void* ptr,\n    size_t length,\n    write_callback_fn fn) {\n  if (unlikely(!impl_)) {\n    // FIXME In C++-17 perhaps a global static inline variable would be better?\n    static Error error = TP_CREATE_ERROR(ContextNotViableError);\n    fn(error);\n    return;\n  }\n  impl_->write(ptr, length, std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionBoilerplate<TCtx, TList, TConn>::write(\n    const AbstractNopHolder& object,\n    write_callback_fn fn) {\n  if (unlikely(!impl_)) {\n    // FIXME In C++-17 perhaps a global static inline variable would be better?\n    static Error error = TP_CREATE_ERROR(ContextNotViableError);\n    fn(error);\n    return;\n  }\n  impl_->write(object, std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionBoilerplate<TCtx, TList, TConn>::setId(std::string id) {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->setId(std::move(id));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionBoilerplate<TCtx, TList, TConn>::close() {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->close();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nConnectionBoilerplate<TCtx, TList, TConn>::~ConnectionBoilerplate() {\n  close();\n}\n\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/connection_impl_boilerplate.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <utility>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/transport/connection.h>\n#include <tensorpipe/transport/error.h>\n\nnamespace tensorpipe {\nnamespace transport {\n\ntemplate <typename TCtx, typename TList, typename TConn>\nclass ContextImplBoilerplate;\n\ntemplate <typename TCtx, typename TList, typename TConn>\nclass ListenerImplBoilerplate;\n\ntemplate <typename TCtx, typename TList, typename TConn>\nclass ConnectionImplBoilerplate : public std::enable_shared_from_this<TConn> {\n public:\n  class ConstructorToken {\n   public:\n    ConstructorToken(const ConstructorToken&) = default;\n\n   private:\n    explicit ConstructorToken() {}\n    friend ContextImplBoilerplate<TCtx, TList, TConn>;\n    friend ListenerImplBoilerplate<TCtx, TList, TConn>;\n  };\n\n  ConnectionImplBoilerplate(\n      ConstructorToken token,\n      std::shared_ptr<TCtx> context,\n      std::string id);\n\n  ConnectionImplBoilerplate(const ConnectionImplBoilerplate&) = delete;\n  ConnectionImplBoilerplate(ConnectionImplBoilerplate&&) = delete;\n  ConnectionImplBoilerplate& operator=(const ConnectionImplBoilerplate&) =\n      delete;\n  ConnectionImplBoilerplate& operator=(ConnectionImplBoilerplate&&) = delete;\n\n  // Initialize member fields that need `shared_from_this`.\n  void init();\n\n  // Queue a read operation.\n  using read_callback_fn = Connection::read_callback_fn;\n  using read_nop_callback_fn = Connection::read_nop_callback_fn;\n  void read(read_callback_fn fn);\n  void read(AbstractNopHolder& object, read_nop_callback_fn fn);\n  void read(void* ptr, size_t length, read_callback_fn fn);\n\n  // Perform a write operation.\n  using write_callback_fn = Connection::write_callback_fn;\n  void write(const void* ptr, size_t length, write_callback_fn fn);\n  void write(const AbstractNopHolder& object, write_callback_fn fn);\n\n  // Tell the connection what its identifier is.\n  void setId(std::string id);\n\n  // Shut down the connection and its resources.\n  void close();\n\n  virtual ~ConnectionImplBoilerplate() = default;\n\n protected:\n  virtual void initImplFromLoop() = 0;\n  virtual void readImplFromLoop(read_callback_fn fn) = 0;\n  virtual void readImplFromLoop(\n      AbstractNopHolder& object,\n      read_nop_callback_fn fn);\n  virtual void readImplFromLoop(\n      void* ptr,\n      size_t length,\n      read_callback_fn fn) = 0;\n  virtual void writeImplFromLoop(\n      const void* ptr,\n      size_t length,\n      write_callback_fn fn) = 0;\n  virtual void writeImplFromLoop(\n      const AbstractNopHolder& object,\n      write_callback_fn fn);\n  virtual void handleErrorImpl() = 0;\n\n  void setError(Error error);\n\n  const std::shared_ptr<TCtx> context_;\n\n  Error error_{Error::kSuccess};\n\n  // An identifier for the connection, composed of the identifier for the\n  // context or listener, combined with an increasing sequence number. It will\n  // only be used for logging and debugging purposes.\n  std::string id_;\n\n private:\n  // Initialize member fields that need `shared_from_this`.\n  void initFromLoop();\n\n  // Queue a read operation.\n  void readFromLoop(read_callback_fn fn);\n  void readFromLoop(AbstractNopHolder& object, read_nop_callback_fn fn);\n  void readFromLoop(void* ptr, size_t length, read_callback_fn fn);\n\n  // Perform a write operation.\n  void writeFromLoop(const void* ptr, size_t length, write_callback_fn fn);\n  void writeFromLoop(const AbstractNopHolder& object, write_callback_fn fn);\n\n  void setIdFromLoop(std::string id);\n\n  // Shut down the connection and its resources.\n  void closeFromLoop();\n\n  // Deal with an error.\n  void handleError();\n\n  // A sequence number for the calls to read and write.\n  uint64_t nextBufferBeingRead_{0};\n  uint64_t nextBufferBeingWritten_{0};\n\n  // Contexts and listeners do sometimes need to call directly into initFromLoop\n  // and closeFromLoop, in order to make sure that some of their operations can\n  // happen \"atomically\" on the connection, without possibly other operations\n  // occurring in between (e.g., an error).\n  friend ContextImplBoilerplate<TCtx, TList, TConn>;\n  friend ListenerImplBoilerplate<TCtx, TList, TConn>;\n};\n\ntemplate <typename TCtx, typename TList, typename TConn>\nConnectionImplBoilerplate<TCtx, TList, TConn>::ConnectionImplBoilerplate(\n    ConstructorToken /* unused */,\n    std::shared_ptr<TCtx> context,\n    std::string id)\n    : context_(std::move(context)), id_(std::move(id)) {}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::init() {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}]() { impl->initFromLoop(); });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::initFromLoop() {\n  if (context_->closed()) {\n    // Set the error without calling setError because we do not want to invoke\n    // the subclass's handleErrorImpl as it would find itself in a weird state\n    // (since initFromLoop wouldn't have been called).\n    error_ = TP_CREATE_ERROR(ConnectionClosedError);\n    TP_VLOG(7) << \"Connection \" << id_ << \" is closing (without initing)\";\n    return;\n  }\n\n  initImplFromLoop();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::read(read_callback_fn fn) {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}, fn{std::move(fn)}]() mutable {\n        impl->readFromLoop(std::move(fn));\n      });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::readFromLoop(\n    read_callback_fn fn) {\n  TP_DCHECK(context_->inLoop());\n\n  uint64_t sequenceNumber = nextBufferBeingRead_++;\n  TP_VLOG(7) << \"Connection \" << id_ << \" received a read request (#\"\n             << sequenceNumber << \")\";\n\n  fn = [this, sequenceNumber, fn{std::move(fn)}](\n           const Error& error, const void* ptr, size_t length) {\n    TP_VLOG(7) << \"Connection \" << id_ << \" is calling a read callback (#\"\n               << sequenceNumber << \")\";\n    fn(error, ptr, length);\n    TP_VLOG(7) << \"Connection \" << id_ << \" done calling a read callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  if (error_) {\n    fn(error_, nullptr, 0);\n    return;\n  }\n\n  readImplFromLoop(std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::read(\n    AbstractNopHolder& object,\n    read_nop_callback_fn fn) {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}, &object, fn{std::move(fn)}]() mutable {\n        impl->readFromLoop(object, std::move(fn));\n      });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::readFromLoop(\n    AbstractNopHolder& object,\n    read_nop_callback_fn fn) {\n  TP_DCHECK(context_->inLoop());\n\n  uint64_t sequenceNumber = nextBufferBeingRead_++;\n  TP_VLOG(7) << \"Connection \" << id_ << \" received a nop object read request (#\"\n             << sequenceNumber << \")\";\n\n  fn = [this, sequenceNumber, fn{std::move(fn)}](const Error& error) {\n    TP_VLOG(7) << \"Connection \" << id_\n               << \" is calling a nop object read callback (#\" << sequenceNumber\n               << \")\";\n    fn(error);\n    TP_VLOG(7) << \"Connection \" << id_\n               << \" done calling a nop object read callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  if (error_) {\n    fn(error_);\n    return;\n  }\n\n  readImplFromLoop(object, std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::readImplFromLoop(\n    AbstractNopHolder& object,\n    read_nop_callback_fn fn) {\n  readImplFromLoop([&object, fn{std::move(fn)}](\n                       const Error& error, const void* ptr, size_t len) {\n    if (!error) {\n      NopReader reader(reinterpret_cast<const uint8_t*>(ptr), len);\n      nop::Status<void> status = object.read(reader);\n      TP_THROW_ASSERT_IF(status.has_error())\n          << \"Error reading nop object: \" << status.GetErrorMessage();\n    }\n    fn(error);\n  });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::read(\n    void* ptr,\n    size_t length,\n    read_callback_fn fn) {\n  context_->deferToLoop([impl{this->shared_from_this()},\n                         ptr,\n                         length,\n                         fn{std::move(fn)}]() mutable {\n    impl->readFromLoop(ptr, length, std::move(fn));\n  });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::readFromLoop(\n    void* ptr,\n    size_t length,\n    read_callback_fn fn) {\n  TP_DCHECK(context_->inLoop());\n\n  uint64_t sequenceNumber = nextBufferBeingRead_++;\n  TP_VLOG(7) << \"Connection \" << id_ << \" received a read request (#\"\n             << sequenceNumber << \")\";\n\n  fn = [this, sequenceNumber, fn{std::move(fn)}](\n           const Error& error, const void* ptr, size_t length) {\n    TP_VLOG(7) << \"Connection \" << id_ << \" is calling a read callback (#\"\n               << sequenceNumber << \")\";\n    fn(error, ptr, length);\n    TP_VLOG(7) << \"Connection \" << id_ << \" done calling a read callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  if (error_) {\n    fn(error_, ptr, length);\n    return;\n  }\n\n  readImplFromLoop(ptr, length, std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::write(\n    const void* ptr,\n    size_t length,\n    write_callback_fn fn) {\n  context_->deferToLoop([impl{this->shared_from_this()},\n                         ptr,\n                         length,\n                         fn{std::move(fn)}]() mutable {\n    impl->writeFromLoop(ptr, length, std::move(fn));\n  });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::writeFromLoop(\n    const void* ptr,\n    size_t length,\n    write_callback_fn fn) {\n  TP_DCHECK(context_->inLoop());\n\n  uint64_t sequenceNumber = nextBufferBeingWritten_++;\n  TP_VLOG(7) << \"Connection \" << id_ << \" received a write request (#\"\n             << sequenceNumber << \")\";\n\n  fn = [this, sequenceNumber, fn{std::move(fn)}](const Error& error) {\n    TP_VLOG(7) << \"Connection \" << id_ << \" is calling a write callback (#\"\n               << sequenceNumber << \")\";\n    fn(error);\n    TP_VLOG(7) << \"Connection \" << id_ << \" done calling a write callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  if (error_) {\n    fn(error_);\n    return;\n  }\n\n  writeImplFromLoop(ptr, length, std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::write(\n    const AbstractNopHolder& object,\n    write_callback_fn fn) {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}, &object, fn{std::move(fn)}]() mutable {\n        impl->writeFromLoop(object, std::move(fn));\n      });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::writeFromLoop(\n    const AbstractNopHolder& object,\n    write_callback_fn fn) {\n  TP_DCHECK(context_->inLoop());\n\n  uint64_t sequenceNumber = nextBufferBeingWritten_++;\n  TP_VLOG(7) << \"Connection \" << id_\n             << \" received a nop object write request (#\" << sequenceNumber\n             << \")\";\n\n  fn = [this, sequenceNumber, fn{std::move(fn)}](const Error& error) {\n    TP_VLOG(7) << \"Connection \" << id_\n               << \" is calling a nop object write callback (#\" << sequenceNumber\n               << \")\";\n    fn(error);\n    TP_VLOG(7) << \"Connection \" << id_\n               << \" done calling a nop object write callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  if (error_) {\n    fn(error_);\n    return;\n  }\n\n  writeImplFromLoop(object, std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::writeImplFromLoop(\n    const AbstractNopHolder& object,\n    write_callback_fn fn) {\n  const size_t len = object.getSize();\n\n  // Using a shared_ptr instead of unique_ptr because if the lambda captures a\n  // unique_ptr then it becomes non-copyable, which prevents it from being\n  // converted to a function. In C++20 use std::make_shared<uint8_t[]>(len).\n  //\n  // Note: this is a std::shared_ptr<uint8_t[]> semantically. A shared_ptr\n  // with array type is supported in C++17 and higher.\n  //\n  auto buf = std::shared_ptr<uint8_t>(\n      new uint8_t[len], std::default_delete<uint8_t[]>());\n  auto ptr = buf.get();\n\n  NopWriter writer(ptr, len);\n  nop::Status<void> status = object.write(writer);\n  TP_THROW_ASSERT_IF(status.has_error())\n      << \"Error writing nop object: \" << status.GetErrorMessage();\n\n  // Perform write and forward callback.\n  writeImplFromLoop(\n      ptr,\n      len,\n      [buf{std::move(buf)}, fn{std::move(fn)}](const Error& error) mutable {\n        // The write has completed; destroy write buffer.\n        buf.reset();\n        fn(error);\n      });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::setId(std::string id) {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}, id{std::move(id)}]() mutable {\n        impl->setIdFromLoop(std::move(id));\n      });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::setIdFromLoop(\n    std::string id) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(7) << \"Connection \" << id_ << \" was renamed to \" << id;\n  id_ = std::move(id);\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::close() {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}]() { impl->closeFromLoop(); });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::closeFromLoop() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(7) << \"Connection \" << id_ << \" is closing\";\n  setError(TP_CREATE_ERROR(ConnectionClosedError));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::setError(Error error) {\n  // Don't overwrite an error that's already set.\n  if (error_ || !error) {\n    return;\n  }\n\n  error_ = std::move(error);\n\n  handleError();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ConnectionImplBoilerplate<TCtx, TList, TConn>::handleError() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(8) << \"Connection \" << id_ << \" is handling error \" << error_.what();\n\n  handleErrorImpl();\n}\n\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/context.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n\nnamespace tensorpipe {\nnamespace transport {\n\nclass Connection;\nclass Listener;\n\nclass Context {\n public:\n  virtual std::shared_ptr<Connection> connect(std::string addr) = 0;\n\n  virtual std::shared_ptr<Listener> listen(std::string addr) = 0;\n\n  // Return whether the context is able to operate correctly.\n  //\n  // Some transport types may be unable to perform as intended under\n  // some circumstances (e.g., specialized hardware unavailable, lack\n  // of permissions). They can report it through this method in order\n  // for the core context to avoid registering them in the first place.\n  //\n  virtual bool isViable() const = 0;\n\n  // Return string to describe the domain for this context.\n  //\n  // Two processes with a context of the same type can connect to each\n  // other if one side's domain descriptor is \"accepted\" by the other\n  // one, using the canCommunicateWithRemote method below. That method\n  // must be symmetric, and unless overridden defaults to string\n  // comparison.\n  //\n  // For example, for a transport that leverages TCP/IP, this may be\n  // as simple as the address family (assuming we can route between\n  // any two processes). For a transport that leverages shared memory,\n  // this descriptor must uniquely identify the machine, such that\n  // only co-located processes generate the same domain descriptor.\n  //\n  virtual const std::string& domainDescriptor() const = 0;\n\n  // Compare local and remote domain descriptor for compatibility.\n  //\n  // Determine whether a connection can be opened between this context\n  // and a remote one that has the given domain descriptor. This\n  // function needs to be symmetric: if we called this method on the\n  // remote context with the local descriptor we should get the same\n  // answer. Unless overridden it defaults to string comparison.\n  //\n  virtual bool canCommunicateWithRemote(\n      const std::string& remoteDomainDescriptor) const {\n    return domainDescriptor() == remoteDomainDescriptor;\n  }\n\n  // Tell the context what its identifier is.\n  //\n  // This is only supposed to be called from the high-level context or from\n  // channel contexts. It will only used for logging and debugging purposes.\n  virtual void setId(std::string id) = 0;\n\n  virtual void close() = 0;\n\n  virtual void join() = 0;\n\n  virtual ~Context() = default;\n};\n\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/context_boilerplate.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstddef>\n#include <memory>\n#include <string>\n#include <type_traits>\n#include <utility>\n\n#include <tensorpipe/transport/context.h>\n#include <tensorpipe/transport/context_impl_boilerplate.h>\n\nnamespace tensorpipe {\nnamespace transport {\n\ntemplate <typename TCtx, typename TList, typename TConn>\nclass ContextBoilerplate : public Context {\n public:\n  template <typename... Args>\n  explicit ContextBoilerplate(Args&&... args);\n\n  ContextBoilerplate(const ContextBoilerplate&) = delete;\n  ContextBoilerplate(ContextBoilerplate&&) = delete;\n  ContextBoilerplate& operator=(const ContextBoilerplate&) = delete;\n  ContextBoilerplate& operator=(ContextBoilerplate&&) = delete;\n\n  std::shared_ptr<Connection> connect(std::string addr) override;\n\n  std::shared_ptr<Listener> listen(std::string addr) override;\n\n  bool isViable() const override;\n\n  const std::string& domainDescriptor() const override;\n\n  void setId(std::string id) override;\n\n  void close() override;\n\n  void join() override;\n\n  ~ContextBoilerplate() override;\n\n protected:\n  // The implementation is managed by a shared_ptr because each child object\n  // will also hold a shared_ptr to it (downcast as a shared_ptr to the private\n  // interface). However, its lifetime is tied to the one of this public object,\n  // since when the latter is destroyed the implementation is closed and joined.\n  const std::shared_ptr<TCtx> impl_;\n};\n\ntemplate <typename TCtx, typename TList, typename TConn>\ntemplate <typename... Args>\nContextBoilerplate<TCtx, TList, TConn>::ContextBoilerplate(Args&&... args)\n    : impl_(TCtx::create(std::forward<Args>(args)...)) {\n  static_assert(\n      std::is_base_of<ContextImplBoilerplate<TCtx, TList, TConn>, TCtx>::value,\n      \"\");\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->init();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nstd::shared_ptr<Connection> ContextBoilerplate<TCtx, TList, TConn>::connect(\n    std::string addr) {\n  if (unlikely(!impl_)) {\n    return std::make_shared<ConnectionBoilerplate<TCtx, TList, TConn>>(nullptr);\n  }\n  return impl_->connect(std::move(addr));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nstd::shared_ptr<Listener> ContextBoilerplate<TCtx, TList, TConn>::listen(\n    std::string addr) {\n  if (unlikely(!impl_)) {\n    return std::make_shared<ListenerBoilerplate<TCtx, TList, TConn>>(nullptr);\n  }\n  return impl_->listen(std::move(addr));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nbool ContextBoilerplate<TCtx, TList, TConn>::isViable() const {\n  return impl_ != nullptr;\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nconst std::string& ContextBoilerplate<TCtx, TList, TConn>::domainDescriptor()\n    const {\n  if (unlikely(!impl_)) {\n    // FIXME In C++-17 perhaps a global static inline variable would be better?\n    static std::string empty = \"\";\n    return empty;\n  }\n  return impl_->domainDescriptor();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextBoilerplate<TCtx, TList, TConn>::setId(std::string id) {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->setId(std::move(id));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextBoilerplate<TCtx, TList, TConn>::close() {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->close();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextBoilerplate<TCtx, TList, TConn>::join() {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->join();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nContextBoilerplate<TCtx, TList, TConn>::~ContextBoilerplate() {\n  join();\n}\n\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/context_impl_boilerplate.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <future>\n#include <memory>\n#include <string>\n#include <unordered_map>\n#include <utility>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/transport/connection_boilerplate.h>\n#include <tensorpipe/transport/listener_boilerplate.h>\n\nnamespace tensorpipe {\nnamespace transport {\n\ntemplate <typename TCtx, typename TList, typename TConn>\nclass ContextImplBoilerplate : public virtual DeferredExecutor,\n                               public std::enable_shared_from_this<TCtx> {\n public:\n  explicit ContextImplBoilerplate(std::string domainDescriptor);\n\n  ContextImplBoilerplate(const ContextImplBoilerplate&) = delete;\n  ContextImplBoilerplate(ContextImplBoilerplate&&) = delete;\n  ContextImplBoilerplate& operator=(const ContextImplBoilerplate&) = delete;\n  ContextImplBoilerplate& operator=(ContextImplBoilerplate&&) = delete;\n\n  void init();\n\n  std::shared_ptr<Connection> connect(std::string addr);\n\n  std::shared_ptr<Listener> listen(std::string addr);\n\n  const std::string& domainDescriptor() const;\n\n  // Enrolling dependent objects (listeners and connections) causes them to be\n  // kept alive for as long as the context exists. These objects should enroll\n  // themselves as soon as they're created (in their initImplFromLoop method)\n  // and unenroll themselves after they've completed handling an error (either\n  // right in the handleErrorImpl method or in a subsequent callback). The\n  // context, on the other hand, should avoid terminating (i.e., complete\n  // joining) until all objects have unenrolled themselves.\n  void enroll(TList& listener);\n  void enroll(TConn& connection);\n  void unenroll(TList& listener);\n  void unenroll(TConn& connection);\n\n  // Return whether the context is in a closed state. To avoid race conditions,\n  // this must be called from within the loop.\n  bool closed();\n\n  void setId(std::string id);\n\n  void close();\n\n  void join();\n\n  virtual ~ContextImplBoilerplate() = default;\n\n protected:\n  virtual void initImplFromLoop() {}\n  virtual void handleErrorImpl() = 0;\n  virtual void joinImpl() = 0;\n\n  void setError(Error error);\n\n  Error error_{Error::kSuccess};\n\n  // An identifier for the context, composed of the identifier for the context,\n  // combined with the transport's name. It will only be used for logging and\n  // debugging purposes.\n  std::string id_{\"N/A\"};\n\n  CallbackWrapper<TCtx> callbackWrapper_{*this, *this};\n\n private:\n  void initFromLoop();\n  void closeFromLoop();\n\n  void handleError();\n\n  std::atomic<bool> joined_{false};\n\n  const std::string domainDescriptor_;\n\n  // Sequence numbers for the listeners and connections created by this context,\n  // used to create their identifiers based off this context's identifier. They\n  // will only be used for logging and debugging.\n  std::atomic<uint64_t> listenerCounter_{0};\n  std::atomic<uint64_t> connectionCounter_{0};\n\n  // Store shared_ptrs to dependent objects that have enrolled themselves to\n  // keep them alive. We use a map, indexed by raw pointers, rather than a set\n  // of shared_ptrs so that we can erase objects without them having to create\n  // a fresh shared_ptr just for that.\n  std::unordered_map<TList*, std::shared_ptr<TList>> listeners_;\n  std::unordered_map<TConn*, std::shared_ptr<TConn>> connections_;\n\n  // For some odd reason it seems we need to use a qualified name here...\n  template <typename T>\n  friend class tensorpipe::CallbackWrapper;\n};\n\ntemplate <typename TCtx, typename TList, typename TConn>\nContextImplBoilerplate<TCtx, TList, TConn>::ContextImplBoilerplate(\n    std::string domainDescriptor)\n    : domainDescriptor_(std::move(domainDescriptor)) {}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::init() {\n  deferToLoop([this]() { initFromLoop(); });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::initFromLoop() {\n  TP_DCHECK(inLoop());\n\n  TP_DCHECK(!error_);\n\n  initImplFromLoop();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nstd::shared_ptr<Connection> ContextImplBoilerplate<TCtx, TList, TConn>::connect(\n    std::string addr) {\n  std::string connectionId = id_ + \".c\" + std::to_string(connectionCounter_++);\n  TP_VLOG(7) << \"Transport context \" << id_ << \" is opening connection \"\n             << connectionId << \" to address \" << addr;\n  return std::make_shared<ConnectionBoilerplate<TCtx, TList, TConn>>(\n      typename ConnectionImplBoilerplate<TCtx, TList, TConn>::\n          ConstructorToken(),\n      this->shared_from_this(),\n      std::move(connectionId),\n      std::move(addr));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nstd::shared_ptr<Listener> ContextImplBoilerplate<TCtx, TList, TConn>::listen(\n    std::string addr) {\n  std::string listenerId = id_ + \".l\" + std::to_string(listenerCounter_++);\n  TP_VLOG(7) << \"Transport context \" << id_ << \" is opening listener \"\n             << listenerId << \" on address \" << addr;\n  return std::make_shared<ListenerBoilerplate<TCtx, TList, TConn>>(\n      typename ListenerImplBoilerplate<TCtx, TList, TConn>::ConstructorToken(),\n      this->shared_from_this(),\n      std::move(listenerId),\n      std::move(addr));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nconst std::string& ContextImplBoilerplate<TCtx, TList, TConn>::\n    domainDescriptor() const {\n  return domainDescriptor_;\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::enroll(TList& listener) {\n  TP_DCHECK(inLoop());\n  bool wasInserted;\n  std::tie(std::ignore, wasInserted) =\n      listeners_.emplace(&listener, listener.shared_from_this());\n  TP_DCHECK(wasInserted);\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::enroll(TConn& connection) {\n  TP_DCHECK(inLoop());\n  bool wasInserted;\n  std::tie(std::ignore, wasInserted) =\n      connections_.emplace(&connection, connection.shared_from_this());\n  TP_DCHECK(wasInserted);\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::unenroll(TList& listener) {\n  TP_DCHECK(inLoop());\n  auto numRemoved = listeners_.erase(&listener);\n  TP_DCHECK_EQ(numRemoved, 1);\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::unenroll(TConn& connection) {\n  TP_DCHECK(inLoop());\n  auto numRemoved = connections_.erase(&connection);\n  TP_DCHECK_EQ(numRemoved, 1);\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nbool ContextImplBoilerplate<TCtx, TList, TConn>::closed() {\n  TP_DCHECK(inLoop());\n  return error_;\n};\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::setId(std::string id) {\n  TP_VLOG(7) << \"Transport context \" << id_ << \" was renamed to \" << id;\n  id_ = std::move(id);\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::close() {\n  deferToLoop([this]() { closeFromLoop(); });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::closeFromLoop() {\n  TP_DCHECK(inLoop());\n  TP_VLOG(7) << \"Transport context \" << id_ << \" is closing\";\n  setError(TP_CREATE_ERROR(ContextClosedError));\n  TP_VLOG(7) << \"Transport context \" << id_ << \" done closing\";\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::setError(Error error) {\n  // Don't overwrite an error that's already set.\n  if (error_ || !error) {\n    return;\n  }\n\n  error_ = std::move(error);\n\n  handleError();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::handleError() {\n  TP_DCHECK(inLoop());\n  TP_VLOG(8) << \"Transport context \" << id_ << \" is handling error \"\n             << error_.what();\n\n  // Make a copy as they could unenroll themselves inline.\n  auto listenersCopy = listeners_;\n  auto connectionsCopy = connections_;\n  // We call closeFromLoop, rather than just close, because we need these\n  // objects to transition _immediately_ to error, \"atomically\". If we just\n  // deferred closing to later, this could come after some already-enqueued\n  // operations that could try to access the context, which would be closed,\n  // and this could fail.\n  for (auto& iter : listenersCopy) {\n    iter.second->closeFromLoop();\n  }\n  for (auto& iter : connectionsCopy) {\n    iter.second->closeFromLoop();\n  }\n\n  handleErrorImpl();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ContextImplBoilerplate<TCtx, TList, TConn>::join() {\n  close();\n\n  if (!joined_.exchange(true)) {\n    TP_VLOG(7) << \"Transport context \" << id_ << \" is joining\";\n\n    // As closing is deferred to the loop, we must wait for closeImpl to be\n    // actually called before we call joinImpl, to avoid race conditions. For\n    // this, we defer another task to the loop, which we know will run after the\n    // closing, and then we wait for that task to be run.\n    std::promise<void> hasClosed;\n    deferToLoop([&]() { hasClosed.set_value(); });\n    hasClosed.get_future().wait();\n\n    joinImpl();\n\n    TP_VLOG(7) << \"Transport context \" << id_ << \" done joining\";\n\n    TP_DCHECK(listeners_.empty());\n    TP_DCHECK(connections_.empty());\n  }\n}\n\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/error.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/error.h>\n\nnamespace tensorpipe {\nnamespace transport {\n\nstd::string ContextClosedError::what() const {\n  return \"context closed\";\n}\n\nstd::string ListenerClosedError::what() const {\n  return \"listener closed\";\n}\n\nstd::string ConnectionClosedError::what() const {\n  return \"connection closed\";\n}\n\nstd::string ContextNotViableError::what() const {\n  return \"context not viable\";\n}\n\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/error.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n\n#include <tensorpipe/common/error.h>\n\nnamespace tensorpipe {\nnamespace transport {\n\nclass ContextClosedError final : public BaseError {\n public:\n  ContextClosedError() {}\n\n  std::string what() const override;\n};\n\nclass ListenerClosedError final : public BaseError {\n public:\n  ListenerClosedError() {}\n\n  std::string what() const override;\n};\n\nclass ConnectionClosedError final : public BaseError {\n public:\n  ConnectionClosedError() {}\n\n  std::string what() const override;\n};\n\nclass ContextNotViableError final : public BaseError {\n public:\n  ContextNotViableError() {}\n\n  std::string what() const override;\n};\n\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/connection_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/ibv/connection_impl.h>\n\n#include <string.h>\n\n#include <deque>\n#include <vector>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/epoll_loop.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/common/ibv.h>\n#include <tensorpipe/common/memory.h>\n#include <tensorpipe/common/ringbuffer_read_write_ops.h>\n#include <tensorpipe/common/ringbuffer_role.h>\n#include <tensorpipe/common/socket.h>\n#include <tensorpipe/transport/error.h>\n#include <tensorpipe/transport/ibv/constants.h>\n#include <tensorpipe/transport/ibv/context_impl.h>\n#include <tensorpipe/transport/ibv/error.h>\n#include <tensorpipe/transport/ibv/reactor.h>\n#include <tensorpipe/transport/ibv/sockaddr.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nnamespace {\n\n// The data that each queue pair endpoint needs to send to the other endpoint in\n// order to set up the queue pair itself. This data is transferred over a TCP\n// connection.\nstruct Exchange {\n  IbvSetupInformation setupInfo;\n  uint64_t memoryRegionPtr;\n  uint32_t memoryRegionKey;\n};\n\n} // namespace\n\nConnectionImpl::ConnectionImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    Socket socket)\n    : ConnectionImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      socket_(std::move(socket)) {}\n\nConnectionImpl::ConnectionImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::string addr)\n    : ConnectionImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      sockaddr_(Sockaddr::createInetSockAddr(addr)) {}\n\nvoid ConnectionImpl::initImplFromLoop() {\n  context_->enroll(*this);\n\n  Error error;\n  // The connection either got a socket or an address, but not both.\n  TP_DCHECK(socket_.hasValue() ^ sockaddr_.has_value());\n  if (!socket_.hasValue()) {\n    std::tie(error, socket_) =\n        Socket::createForFamily(sockaddr_->addr()->sa_family);\n    if (error) {\n      setError(std::move(error));\n      return;\n    }\n    error = socket_.reuseAddr(true);\n    if (error) {\n      setError(std::move(error));\n      return;\n    }\n    error = socket_.connect(sockaddr_.value());\n    if (error) {\n      setError(std::move(error));\n      return;\n    }\n  }\n  // Ensure underlying control socket is non-blocking such that it\n  // works well with event driven I/O.\n  error = socket_.block(false);\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n\n  // Create ringbuffer for inbox.\n  std::tie(error, inboxBuf_) = MmappedPtr::create(\n      kBufferSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1);\n  TP_THROW_ASSERT_IF(error)\n      << \"Couldn't allocate ringbuffer for connection inbox: \" << error.what();\n  inboxRb_ =\n      RingBuffer<kNumInboxRingbufferRoles>(&inboxHeader_, inboxBuf_.ptr());\n  inboxMr_ = createIbvMemoryRegion(\n      context_->getReactor().getIbvLib(),\n      context_->getReactor().getIbvPd(),\n      inboxBuf_.ptr(),\n      kBufferSize,\n      IbvLib::ACCESS_LOCAL_WRITE | IbvLib::ACCESS_REMOTE_WRITE);\n\n  // Create ringbuffer for outbox.\n  std::tie(error, outboxBuf_) = MmappedPtr::create(\n      kBufferSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1);\n  TP_THROW_ASSERT_IF(error)\n      << \"Couldn't allocate ringbuffer for connection outbox: \" << error.what();\n  outboxRb_ =\n      RingBuffer<kNumOutboxRingbufferRoles>(&outboxHeader_, outboxBuf_.ptr());\n  outboxMr_ = createIbvMemoryRegion(\n      context_->getReactor().getIbvLib(),\n      context_->getReactor().getIbvPd(),\n      outboxBuf_.ptr(),\n      kBufferSize,\n      0);\n\n  // Create and init queue pair.\n  {\n    IbvLib::qp_init_attr initAttr;\n    std::memset(&initAttr, 0, sizeof(initAttr));\n    initAttr.qp_type = IbvLib::QPT_RC;\n    initAttr.send_cq = context_->getReactor().getIbvCq().get();\n    initAttr.recv_cq = context_->getReactor().getIbvCq().get();\n    initAttr.cap.max_send_wr = kSendQueueSize;\n    initAttr.cap.max_send_sge = 1;\n    initAttr.srq = context_->getReactor().getIbvSrq().get();\n    initAttr.sq_sig_all = 1;\n    qp_ = createIbvQueuePair(\n        context_->getReactor().getIbvLib(),\n        context_->getReactor().getIbvPd(),\n        initAttr);\n  }\n  transitionIbvQueuePairToInit(\n      context_->getReactor().getIbvLib(),\n      qp_,\n      context_->getReactor().getIbvAddress());\n\n  // Register methods to be called when our peer writes to our inbox and reads\n  // from our outbox.\n  context_->getReactor().registerQp(qp_->qp_num, shared_from_this());\n\n  // We're sending address first, so wait for writability.\n  state_ = SEND_ADDR;\n  context_->registerDescriptor(socket_.fd(), EPOLLOUT, shared_from_this());\n}\n\nvoid ConnectionImpl::readImplFromLoop(read_callback_fn fn) {\n  readOperations_.emplace_back(std::move(fn));\n\n  // If the inbox already contains some data, we may be able to process this\n  // operation right away.\n  processReadOperationsFromLoop();\n}\n\nvoid ConnectionImpl::readImplFromLoop(\n    AbstractNopHolder& object,\n    read_nop_callback_fn fn) {\n  readOperations_.emplace_back(\n      &object,\n      [fn{std::move(fn)}](\n          const Error& error, const void* /* unused */, size_t /* unused */) {\n        fn(error);\n      });\n\n  // If the inbox already contains some data, we may be able to process this\n  // operation right away.\n  processReadOperationsFromLoop();\n}\n\nvoid ConnectionImpl::readImplFromLoop(\n    void* ptr,\n    size_t length,\n    read_callback_fn fn) {\n  readOperations_.emplace_back(ptr, length, std::move(fn));\n\n  // If the inbox already contains some data, we may be able to process this\n  // operation right away.\n  processReadOperationsFromLoop();\n}\n\nvoid ConnectionImpl::writeImplFromLoop(\n    const void* ptr,\n    size_t length,\n    write_callback_fn fn) {\n  writeOperations_.emplace_back(ptr, length, std::move(fn));\n\n  // If the outbox has some free space, we may be able to process this operation\n  // right away.\n  processWriteOperationsFromLoop();\n}\n\nvoid ConnectionImpl::writeImplFromLoop(\n    const AbstractNopHolder& object,\n    write_callback_fn fn) {\n  writeOperations_.emplace_back(&object, std::move(fn));\n\n  // If the outbox has some free space, we may be able to process this operation\n  // right away.\n  processWriteOperationsFromLoop();\n}\n\nvoid ConnectionImpl::handleEventsFromLoop(int events) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Connection \" << id_ << \" is handling an event on its socket (\"\n             << EpollLoop::formatEpollEvents(events) << \")\";\n\n  // Handle only one of the events in the mask. Events on the control\n  // file descriptor are rare enough for the cost of having epoll call\n  // into this function multiple times to not matter. The benefit is\n  // that every handler can close and unregister the control file\n  // descriptor from the event loop, without worrying about the next\n  // handler trying to do so as well.\n  // In some cases the socket could be in a state where it's both in an error\n  // state and readable/writable. If we checked for EPOLLIN or EPOLLOUT first\n  // and then returned after handling them, we would keep doing so forever and\n  // never reach the error handling. So we should keep the error check first.\n  if (events & EPOLLERR) {\n    int error;\n    socklen_t errorlen = sizeof(error);\n    int rv = getsockopt(\n        socket_.fd(),\n        SOL_SOCKET,\n        SO_ERROR,\n        reinterpret_cast<void*>(&error),\n        &errorlen);\n    if (rv == -1) {\n      setError(TP_CREATE_ERROR(SystemError, \"getsockopt\", rv));\n    } else {\n      setError(TP_CREATE_ERROR(SystemError, \"async error on socket\", error));\n    }\n    return;\n  }\n  if (events & EPOLLIN) {\n    handleEventInFromLoop();\n    return;\n  }\n  if (events & EPOLLOUT) {\n    handleEventOutFromLoop();\n    return;\n  }\n  // Check for hangup last, as there could be cases where we get EPOLLHUP but\n  // there's still data to be read from the socket, so we want to deal with that\n  // before dealing with the hangup.\n  if (events & EPOLLHUP) {\n    setError(TP_CREATE_ERROR(EOFError));\n    return;\n  }\n}\n\nvoid ConnectionImpl::handleEventInFromLoop() {\n  TP_DCHECK(context_->inLoop());\n  if (state_ == RECV_ADDR) {\n    struct Exchange ex;\n\n    auto err = socket_.read(&ex, sizeof(ex));\n    // Crossing our fingers that the exchange information is small enough that\n    // it can be read in a single chunk.\n    if (err != sizeof(ex)) {\n      setError(TP_CREATE_ERROR(ShortReadError, sizeof(ex), err));\n      return;\n    }\n\n    transitionIbvQueuePairToReadyToReceive(\n        context_->getReactor().getIbvLib(),\n        qp_,\n        context_->getReactor().getIbvAddress(),\n        ex.setupInfo);\n    transitionIbvQueuePairToReadyToSend(\n        context_->getReactor().getIbvLib(), qp_);\n\n    peerInboxKey_ = ex.memoryRegionKey;\n    peerInboxPtr_ = ex.memoryRegionPtr;\n\n    // The connection is usable now.\n    state_ = ESTABLISHED;\n    processWriteOperationsFromLoop();\n    // Trigger read operations in case a pair of local read() and remote\n    // write() happened before connection is established. Otherwise read()\n    // callback would lose if it's the only read() request.\n    processReadOperationsFromLoop();\n    return;\n  }\n\n  if (state_ == ESTABLISHED) {\n    // We don't expect to read anything on this socket once the\n    // connection has been established. If we do, assume it's a\n    // zero-byte read indicating EOF.\n    setError(TP_CREATE_ERROR(EOFError));\n    return;\n  }\n\n  TP_THROW_ASSERT() << \"EPOLLIN event not handled in state \" << state_;\n}\n\nvoid ConnectionImpl::handleEventOutFromLoop() {\n  TP_DCHECK(context_->inLoop());\n  if (state_ == SEND_ADDR) {\n    Exchange ex;\n    ex.setupInfo =\n        makeIbvSetupInformation(context_->getReactor().getIbvAddress(), qp_);\n    ex.memoryRegionPtr = reinterpret_cast<uint64_t>(inboxBuf_.ptr());\n    ex.memoryRegionKey = inboxMr_->rkey;\n\n    auto err = socket_.write(reinterpret_cast<void*>(&ex), sizeof(ex));\n    // Crossing our fingers that the exchange information is small enough that\n    // it can be written in a single chunk.\n    if (err != sizeof(ex)) {\n      setError(TP_CREATE_ERROR(ShortWriteError, sizeof(ex), err));\n      return;\n    }\n\n    // Sent our address. Wait for address from peer.\n    state_ = RECV_ADDR;\n    context_->registerDescriptor(socket_.fd(), EPOLLIN, shared_from_this());\n    return;\n  }\n\n  TP_THROW_ASSERT() << \"EPOLLOUT event not handled in state \" << state_;\n}\n\nvoid ConnectionImpl::processReadOperationsFromLoop() {\n  TP_DCHECK(context_->inLoop());\n\n  // Process all read read operations that we can immediately serve, only\n  // when connection is established.\n  if (state_ != ESTABLISHED) {\n    return;\n  }\n  // Serve read operations\n  InboxConsumer inboxConsumer(inboxRb_);\n  while (!readOperations_.empty()) {\n    RingbufferReadOperation& readOperation = readOperations_.front();\n    ssize_t len = readOperation.handleRead(inboxConsumer);\n    if (len > 0) {\n      Reactor::AckInfo info;\n      info.length = len;\n\n      TP_VLOG(9) << \"Connection \" << id_\n                 << \" is posting a send request (acknowledging \" << info.length\n                 << \" bytes) on QP \" << qp_->qp_num;\n      context_->getReactor().postAck(qp_, info);\n      numAcksInFlight_++;\n    }\n    if (readOperation.completed()) {\n      readOperations_.pop_front();\n    } else {\n      break;\n    }\n  }\n}\n\nvoid ConnectionImpl::processWriteOperationsFromLoop() {\n  TP_DCHECK(context_->inLoop());\n\n  if (state_ != ESTABLISHED) {\n    return;\n  }\n\n  OutboxProducer outboxProducer(outboxRb_);\n  while (!writeOperations_.empty()) {\n    RingbufferWriteOperation& writeOperation = writeOperations_.front();\n    ssize_t len = writeOperation.handleWrite(outboxProducer);\n    if (len > 0) {\n      ssize_t ret;\n      OutboxIbvWriter outboxConsumer(outboxRb_);\n\n      ret = outboxConsumer.startTx();\n      TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n      ssize_t numBuffers;\n      std::array<OutboxIbvWriter::Buffer, 2> buffers;\n\n      std::tie(numBuffers, buffers) =\n          outboxConsumer.accessContiguousInTx</*AllowPartial=*/false>(len);\n      TP_THROW_SYSTEM_IF(numBuffers < 0, -numBuffers);\n\n      for (int bufferIdx = 0; bufferIdx < numBuffers; bufferIdx++) {\n        Reactor::WriteInfo info;\n        info.addr = buffers[bufferIdx].ptr;\n        info.length = buffers[bufferIdx].len;\n        info.lkey = outboxMr_->lkey;\n\n        uint64_t peerInboxOffset = peerInboxHead_ & (kBufferSize - 1);\n        peerInboxHead_ += buffers[bufferIdx].len;\n\n        info.remoteAddr = peerInboxPtr_ + peerInboxOffset;\n        info.rkey = peerInboxKey_;\n\n        TP_VLOG(9) << \"Connection \" << id_\n                   << \" is posting a RDMA write request (transmitting \"\n                   << info.length << \" bytes) on QP \" << qp_->qp_num;\n        context_->getReactor().postWrite(qp_, info);\n        numWritesInFlight_++;\n      }\n\n      ret = outboxConsumer.commitTx();\n      TP_THROW_SYSTEM_IF(ret < 0, -ret);\n    }\n    if (writeOperation.completed()) {\n      writeOperations_.pop_front();\n    } else {\n      break;\n    }\n  }\n}\n\nvoid ConnectionImpl::onRemoteProducedData(uint32_t length) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Connection \" << id_ << \" was signalled that \" << length\n             << \" bytes were written to its inbox on QP \" << qp_->qp_num;\n\n  ssize_t ret;\n  InboxIbvRecver inboxProducer(inboxRb_);\n\n  ret = inboxProducer.startTx();\n  TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n  ret = inboxProducer.incMarkerInTx(length);\n  TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n  ret = inboxProducer.commitTx();\n  TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n  processReadOperationsFromLoop();\n}\n\nvoid ConnectionImpl::onRemoteConsumedData(uint32_t length) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Connection \" << id_ << \" was signalled that \" << length\n             << \" bytes were read from its outbox on QP \" << qp_->qp_num;\n  ssize_t ret;\n  OutboxIbvAcker outboxConsumer(outboxRb_);\n\n  ret = outboxConsumer.startTx();\n  TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n  ret = outboxConsumer.incMarkerInTx(length);\n  TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n  ret = outboxConsumer.commitTx();\n  TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n  processWriteOperationsFromLoop();\n}\n\nvoid ConnectionImpl::onWriteCompleted() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Connection \" << id_\n             << \" done posting a RDMA write request on QP \" << qp_->qp_num;\n  numWritesInFlight_--;\n  tryCleanup();\n}\n\nvoid ConnectionImpl::onAckCompleted() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Connection \" << id_ << \" done posting a send request on QP \"\n             << qp_->qp_num;\n  numAcksInFlight_--;\n  tryCleanup();\n}\n\nvoid ConnectionImpl::onError(IbvLib::wc_status status, uint64_t wrId) {\n  TP_DCHECK(context_->inLoop());\n  setError(TP_CREATE_ERROR(\n      IbvError, context_->getReactor().getIbvLib().wc_status_str(status)));\n  if (wrId == kWriteRequestId) {\n    onWriteCompleted();\n  } else if (wrId == kAckRequestId) {\n    onAckCompleted();\n  }\n}\n\nvoid ConnectionImpl::handleErrorImpl() {\n  for (auto& readOperation : readOperations_) {\n    readOperation.handleError(error_);\n  }\n  readOperations_.clear();\n  for (auto& writeOperation : writeOperations_) {\n    writeOperation.handleError(error_);\n  }\n  writeOperations_.clear();\n\n  transitionIbvQueuePairToError(context_->getReactor().getIbvLib(), qp_);\n\n  tryCleanup();\n\n  if (socket_.hasValue()) {\n    if (state_ > INITIALIZING) {\n      context_->unregisterDescriptor(socket_.fd());\n    }\n    socket_.reset();\n  }\n\n  context_->unenroll(*this);\n}\n\nvoid ConnectionImpl::tryCleanup() {\n  TP_DCHECK(context_->inLoop());\n  // Setting the queue pair to an error state will cause all its work requests\n  // (both those that had started being served, and those that hadn't; including\n  // those from a shared receive queue) to be flushed. We need to wait for the\n  // completion events of all those requests to be retrieved from the completion\n  // queue before we can destroy the queue pair. We can do so by deferring the\n  // destruction to the loop, since the reactor will only proceed to invoke\n  // deferred functions once it doesn't have any completion events to handle.\n  // However the RDMA writes and the sends may be queued up inside the reactor\n  // and thus may not have even been scheduled yet, so we explicitly wait for\n  // them to complete.\n  if (error_) {\n    if (numWritesInFlight_ == 0 && numAcksInFlight_ == 0) {\n      TP_VLOG(8) << \"Connection \" << id_ << \" is ready to clean up\";\n      context_->deferToLoop([impl{shared_from_this()}]() { impl->cleanup(); });\n    } else {\n      TP_VLOG(9) << \"Connection \" << id_\n                 << \" cannot proceed to cleanup because it has \"\n                 << numWritesInFlight_ << \" pending RDMA write requests and \"\n                 << numAcksInFlight_ << \" pending send requests on QP \"\n                 << qp_->qp_num;\n    }\n  }\n}\n\nvoid ConnectionImpl::cleanup() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(8) << \"Connection \" << id_ << \" is cleaning up\";\n\n  context_->getReactor().unregisterQp(qp_->qp_num);\n\n  qp_.reset();\n  inboxMr_.reset();\n  inboxBuf_.reset();\n  outboxMr_.reset();\n  outboxBuf_.reset();\n}\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/connection_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string.h>\n\n#include <deque>\n#include <memory>\n#include <string>\n\n#include <tensorpipe/common/epoll_loop.h>\n#include <tensorpipe/common/ibv.h>\n#include <tensorpipe/common/memory.h>\n#include <tensorpipe/common/nop.h>\n#include <tensorpipe/common/ringbuffer.h>\n#include <tensorpipe/common/ringbuffer_read_write_ops.h>\n#include <tensorpipe/common/socket.h>\n#include <tensorpipe/transport/connection_impl_boilerplate.h>\n#include <tensorpipe/transport/ibv/reactor.h>\n#include <tensorpipe/transport/ibv/sockaddr.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nclass ContextImpl;\nclass ListenerImpl;\n\nclass ConnectionImpl final : public ConnectionImplBoilerplate<\n                                 ContextImpl,\n                                 ListenerImpl,\n                                 ConnectionImpl>,\n                             public EpollLoop::EventHandler,\n                             public IbvEventHandler {\n  constexpr static size_t kBufferSize = 2 * 1024 * 1024;\n\n  constexpr static int kNumOutboxRingbufferRoles = 3;\n  using OutboxIbvAcker = RingBufferRole<kNumOutboxRingbufferRoles, 0>;\n  using OutboxIbvWriter = RingBufferRole<kNumOutboxRingbufferRoles, 1>;\n  using OutboxProducer = RingBufferRole<kNumOutboxRingbufferRoles, 2>;\n\n  constexpr static int kNumInboxRingbufferRoles = 2;\n  using InboxConsumer = RingBufferRole<kNumInboxRingbufferRoles, 0>;\n  using InboxIbvRecver = RingBufferRole<kNumInboxRingbufferRoles, 1>;\n\n  enum State {\n    INITIALIZING = 1,\n    SEND_ADDR,\n    RECV_ADDR,\n    ESTABLISHED,\n  };\n\n public:\n  // Create a connection that is already connected (e.g. from a listener).\n  ConnectionImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      Socket socket);\n\n  // Create a connection that connects to the specified address.\n  ConnectionImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::string addr);\n\n  // Implementation of EventHandler.\n  void handleEventsFromLoop(int events) override;\n\n  // Implementation of IbvEventHandler.\n  void onRemoteProducedData(uint32_t length) override;\n  void onRemoteConsumedData(uint32_t length) override;\n  void onWriteCompleted() override;\n  void onAckCompleted() override;\n  void onError(IbvLib::wc_status status, uint64_t wrId) override;\n\n protected:\n  // Implement the entry points called by ConnectionImplBoilerplate.\n  void initImplFromLoop() override;\n  void readImplFromLoop(read_callback_fn fn) override;\n  void readImplFromLoop(AbstractNopHolder& object, read_nop_callback_fn fn)\n      override;\n  void readImplFromLoop(void* ptr, size_t length, read_callback_fn fn) override;\n  void writeImplFromLoop(const void* ptr, size_t length, write_callback_fn fn)\n      override;\n  void writeImplFromLoop(const AbstractNopHolder& object, write_callback_fn fn)\n      override;\n  void handleErrorImpl() override;\n\n private:\n  // Handle events of type EPOLLIN on the UNIX domain socket.\n  //\n  // The only data that is expected on that socket is the address and other\n  // setup information for the other side's queue pair and inbox.\n  void handleEventInFromLoop();\n\n  // Handle events of type EPOLLOUT on the UNIX domain socket.\n  //\n  // Once the socket is writable we send the address and other setup information\n  // for this side's queue pair and inbox.\n  void handleEventOutFromLoop();\n\n  State state_{INITIALIZING};\n  Socket socket_;\n  optional<Sockaddr> sockaddr_;\n\n  IbvQueuePair qp_;\n\n  // Inbox.\n  // Initialize header during construction because it isn't assignable.\n  RingBufferHeader<kNumInboxRingbufferRoles> inboxHeader_{kBufferSize};\n  // Use mmapped memory so it's page-aligned (and, one day, to use huge pages).\n  MmappedPtr inboxBuf_;\n  RingBuffer<kNumInboxRingbufferRoles> inboxRb_;\n  IbvMemoryRegion inboxMr_;\n\n  // Outbox.\n  // Initialize header during construction because it isn't assignable.\n  RingBufferHeader<kNumOutboxRingbufferRoles> outboxHeader_{kBufferSize};\n  // Use mmapped memory so it's page-aligned (and, one day, to use huge pages).\n  MmappedPtr outboxBuf_;\n  RingBuffer<kNumOutboxRingbufferRoles> outboxRb_;\n  IbvMemoryRegion outboxMr_;\n\n  // Peer inbox key, pointer and head.\n  uint32_t peerInboxKey_{0};\n  uint64_t peerInboxPtr_{0};\n  uint64_t peerInboxHead_{0};\n\n  // The connection performs two types of send requests: writing to the remote\n  // inbox, or acknowledging a write into its own inbox. These send operations\n  // could be delayed and stalled by the reactor as only a limited number of\n  // work requests can be outstanding at the same time globally. Thus we keep\n  // count of how many we have pending to make sure they have all completed or\n  // flushed when we close, and that none is stuck in the pipeline.\n  uint32_t numWritesInFlight_{0};\n  uint32_t numAcksInFlight_{0};\n\n  // Pending read operations.\n  std::deque<RingbufferReadOperation> readOperations_;\n\n  // Pending write operations.\n  std::deque<RingbufferWriteOperation> writeOperations_;\n\n  // Process pending read operations if in an operational state.\n  //\n  // This may be triggered by the other side of the connection (by pushing this\n  // side's inbox token to the reactor) when it has written some new data to its\n  // outbox (which is this side's inbox). It is also called by this connection\n  // when it moves into an established state or when a new read operation is\n  // queued, in case data was already available before this connection was ready\n  // to consume it.\n  void processReadOperationsFromLoop();\n\n  // Process pending write operations if in an operational state.\n  //\n  // This may be triggered by the other side of the connection (by pushing this\n  // side's outbox token to the reactor) when it has read some data from its\n  // inbox (which is this side's outbox). This is important when some of this\n  // side's writes couldn't complete because the outbox was full, and thus they\n  // needed to wait for some of its data to be read. This method is also called\n  // by this connection when it moves into an established state, in case some\n  // writes were queued before the connection was ready to process them, or when\n  // a new write operation is queued.\n  void processWriteOperationsFromLoop();\n\n  void tryCleanup();\n  void cleanup();\n};\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/constants.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstdint>\n\nnamespace {\n\n// We should probably allow these to be user-configured. But, for now, we'll set\n// them to the lowest value they can have, the rationale being that this way\n// they will always be valid.\nconstexpr uint8_t kPortNum = 1;\nconstexpr uint8_t kGlobalIdentifierIndex = 0;\n\n// FIXME Instead of hardcoding the next three values, we could use\n// ibv_query_device to obtain max_cqe, max_qp_wr and max_srq_wr and deduce from\n// them the maximum allowed values for these parameters.\n\n// How many simultaneous receive requests to keep queued on the shared receive\n// queue. Incoming RDMA writes and sends will consume one such request. The\n// reactor loop will fill the SRQ back up to this value once some requests\n// complete. So this number should just be large enough to accommodate all the\n// requests that could finish between two reactor loop iterations. And, even if\n// this number ends up being too low, the excess incoming requests will just\n// retry, causing a performance penalty but not a failure.\nconstexpr uint32_t kNumPendingRecvReqs = 1024;\n\n// How many RDMA write requests can be pending at the same time across all\n// connections. We need to put a limit on them because they all use the same\n// global completion queue which has a fixed capacity and if it overruns it will\n// enter an unrecoverable error state. This value is also set as the capacity of\n// the send queue of each queue pair.\nconstexpr uint32_t kNumPendingWriteReqs = 1024;\n\n// How many send requests (used by the receiver to acknowledge the RDMA writes\n// from the sender) can be pending at the same time across all connections.\nconstexpr uint32_t kNumPendingAckReqs = 1024;\n\n// How many elements the completion queue should be able to hold. These elements\n// will be either the completed receive requests of the SRQ, or the completed\n// send requests from a connection's queue pair. We can bound the former value\n// but not the latter, so we try to add some margin.\nconstexpr int kCompletionQueueSize =\n    kNumPendingRecvReqs + kNumPendingWriteReqs + kNumPendingAckReqs;\n\n// How many pending outgoing work requests each send queue should be able to\n// hold. The operations we post on a send queue are the RDMA_WRITEs to send\n// outgoing data and the SENDs to acknowledge incoming data, hence we size the\n// queue to the sum of the maximum amount of these two ops.\nconstexpr int kSendQueueSize = kNumPendingWriteReqs + kNumPendingAckReqs;\n\n// How many work completions to poll from the completion queue at each reactor\n// iteration.\nconstexpr int kNumPolledWorkCompletions = 32;\n\n// When the connection gets closed, to avoid leaks, it needs to \"reclaim\" all\n// the work requests that it had posted, by waiting for their completion. They\n// may however complete with error, which makes it harder to identify and\n// distinguish them from failing incoming requests because, in principle, we\n// cannot access the opcode field of a failed work completion. Therefore, we\n// assign a special ID to those types of requests, to match them later on.\nconstexpr uint64_t kWriteRequestId = 1;\nconstexpr uint64_t kAckRequestId = 2;\n\n} // namespace\n"
  },
  {
    "path": "tensorpipe/transport/ibv/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/ibv/context_impl.h>\n\n#include <tensorpipe/transport/ibv/connection_impl.h>\n#include <tensorpipe/transport/ibv/listener_impl.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nnamespace {\n\n// Prepend descriptor with transport name so it's easy to\n// disambiguate descriptors when debugging.\nconst std::string kDomainDescriptorPrefix{\"ibv:\"};\n\nstd::string generateDomainDescriptor() {\n  // It would be very cool if we could somehow obtain an \"identifier\" for the\n  // InfiniBand subnet that our device belongs to, but nothing of that sort\n  // seems to be available. So instead we say that if the user is trying to\n  // connect two processes which both have access to an InfiniBand device then\n  // they must know what they are doing and probably must have set up things\n  // properly.\n  return kDomainDescriptorPrefix + \"*\";\n}\n\n} // namespace\n\nstd::shared_ptr<ContextImpl> ContextImpl::create() {\n  Error error;\n  IbvLib ibvLib;\n  std::tie(error, ibvLib) = IbvLib::create();\n  if (error) {\n    TP_VLOG(7)\n        << \"IBV transport is not viable because libibverbs couldn't be loaded: \"\n        << error.what();\n    return nullptr;\n  }\n\n  IbvDeviceList deviceList;\n  std::tie(error, deviceList) = IbvDeviceList::create(ibvLib);\n  if (error && error.isOfType<SystemError>() &&\n      error.castToType<SystemError>()->errorCode() == ENOSYS) {\n    TP_VLOG(7) << \"IBV transport is not viable because it couldn't get list of \"\n               << \"InfiniBand devices because the kernel module isn't loaded\";\n    return nullptr;\n  }\n  TP_THROW_ASSERT_IF(error)\n      << \"Couldn't get list of InfiniBand devices: \" << error.what();\n\n  if (deviceList.size() == 0) {\n    TP_VLOG(7) << \"IBV transport is not viable because it couldn't find any \"\n               << \"InfiniBand NICs\";\n    return nullptr;\n  }\n\n  return std::make_shared<ContextImpl>(\n      std::move(ibvLib), std::move(deviceList));\n}\n\nContextImpl::ContextImpl(IbvLib ibvLib, IbvDeviceList deviceList)\n    : ContextImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          generateDomainDescriptor()),\n      reactor_(std::move(ibvLib), std::move(deviceList)) {}\n\nvoid ContextImpl::handleErrorImpl() {\n  loop_.close();\n  reactor_.close();\n}\n\nvoid ContextImpl::joinImpl() {\n  loop_.join();\n  reactor_.join();\n}\n\nbool ContextImpl::inLoop() const {\n  return reactor_.inLoop();\n};\n\nvoid ContextImpl::deferToLoop(std::function<void()> fn) {\n  reactor_.deferToLoop(std::move(fn));\n};\n\nvoid ContextImpl::registerDescriptor(\n    int fd,\n    int events,\n    std::shared_ptr<EpollLoop::EventHandler> h) {\n  loop_.registerDescriptor(fd, events, std::move(h));\n}\n\nvoid ContextImpl::unregisterDescriptor(int fd) {\n  loop_.unregisterDescriptor(fd);\n}\n\nReactor& ContextImpl::getReactor() {\n  return reactor_;\n}\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <functional>\n#include <memory>\n#include <string>\n#include <tuple>\n\n#include <tensorpipe/common/epoll_loop.h>\n#include <tensorpipe/transport/context_impl_boilerplate.h>\n#include <tensorpipe/transport/ibv/reactor.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nclass ConnectionImpl;\nclass ListenerImpl;\n\nclass ContextImpl final\n    : public ContextImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl> {\n public:\n  static std::shared_ptr<ContextImpl> create();\n\n  ContextImpl(IbvLib ibvLib, IbvDeviceList deviceList);\n\n  // Implement the DeferredExecutor interface.\n  bool inLoop() const override;\n  void deferToLoop(std::function<void()> fn) override;\n\n  void registerDescriptor(\n      int fd,\n      int events,\n      std::shared_ptr<EpollLoop::EventHandler> h);\n\n  void unregisterDescriptor(int fd);\n\n  Reactor& getReactor();\n\n protected:\n  // Implement the entry points called by ContextImplBoilerplate.\n  void handleErrorImpl() override;\n  void joinImpl() override;\n\n private:\n  Reactor reactor_;\n  EpollLoop loop_{this->reactor_};\n};\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/error.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/ibv/error.h>\n\n#include <netdb.h>\n\n#include <sstream>\n\n#include <tensorpipe/common/ibv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nstd::string IbvError::what() const {\n  return error_;\n}\n\nstd::string GetaddrinfoError::what() const {\n  std::ostringstream ss;\n  ss << \"getaddrinfo: \" << gai_strerror(error_);\n  return ss.str();\n}\n\nstd::string NoAddrFoundError::what() const {\n  return \"no address found\";\n}\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/error.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n\n#include <tensorpipe/transport/error.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nclass IbvError final : public BaseError {\n public:\n  explicit IbvError(std::string error) : error_(error) {}\n\n  std::string what() const override;\n\n private:\n  std::string error_;\n};\n\nclass GetaddrinfoError final : public BaseError {\n public:\n  explicit GetaddrinfoError(int error) : error_(error) {}\n\n  std::string what() const override;\n\n private:\n  int error_;\n};\n\nclass NoAddrFoundError final : public BaseError {\n public:\n  NoAddrFoundError() {}\n\n  std::string what() const override;\n};\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/factory.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/ibv/factory.h>\n\n#include <tensorpipe/transport/context_boilerplate.h>\n#include <tensorpipe/transport/ibv/connection_impl.h>\n#include <tensorpipe/transport/ibv/context_impl.h>\n#include <tensorpipe/transport/ibv/listener_impl.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nstd::shared_ptr<Context> create() {\n  return std::make_shared<\n      ContextBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>>();\n}\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/factory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nstd::shared_ptr<Context> create();\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/listener_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/ibv/listener_impl.h>\n\n#include <deque>\n#include <functional>\n#include <mutex>\n#include <vector>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/epoll_loop.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/transport/error.h>\n#include <tensorpipe/transport/ibv/connection_impl.h>\n#include <tensorpipe/transport/ibv/context_impl.h>\n#include <tensorpipe/transport/ibv/sockaddr.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nListenerImpl::ListenerImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::string addr)\n    : ListenerImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      sockaddr_(Sockaddr::createInetSockAddr(addr)) {}\n\nvoid ListenerImpl::initImplFromLoop() {\n  context_->enroll(*this);\n\n  Error error;\n  TP_DCHECK(!socket_.hasValue());\n  std::tie(error, socket_) =\n      Socket::createForFamily(sockaddr_.addr()->sa_family);\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n  error = socket_.reuseAddr(true);\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n  error = socket_.bind(sockaddr_);\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n  error = socket_.block(false);\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n  error = socket_.listen(128);\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n  struct sockaddr_storage addr;\n  socklen_t addrlen;\n  std::tie(error, addr, addrlen) = socket_.getSockName();\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n  sockaddr_ = Sockaddr(reinterpret_cast<struct sockaddr*>(&addr), addrlen);\n}\n\nvoid ListenerImpl::handleErrorImpl() {\n  if (!fns_.empty()) {\n    context_->unregisterDescriptor(socket_.fd());\n  }\n  socket_.reset();\n  for (auto& fn : fns_) {\n    fn(error_, std::shared_ptr<Connection>());\n  }\n  fns_.clear();\n\n  context_->unenroll(*this);\n}\n\nvoid ListenerImpl::acceptImplFromLoop(accept_callback_fn fn) {\n  fns_.push_back(std::move(fn));\n\n  // Only register if we go from 0 to 1 pending callbacks. In other cases we\n  // already had a pending callback and thus we were already registered.\n  if (fns_.size() == 1) {\n    // Register with loop for readability events.\n    context_->registerDescriptor(socket_.fd(), EPOLLIN, shared_from_this());\n  }\n}\n\nstd::string ListenerImpl::addrImplFromLoop() const {\n  return sockaddr_.str();\n}\n\nvoid ListenerImpl::handleEventsFromLoop(int events) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Listener \" << id_ << \" is handling an event on its socket (\"\n             << EpollLoop::formatEpollEvents(events) << \")\";\n\n  if (events & EPOLLERR) {\n    int error;\n    socklen_t errorlen = sizeof(error);\n    int rv = getsockopt(\n        socket_.fd(),\n        SOL_SOCKET,\n        SO_ERROR,\n        reinterpret_cast<void*>(&error),\n        &errorlen);\n    if (rv == -1) {\n      setError(TP_CREATE_ERROR(SystemError, \"getsockopt\", rv));\n    } else {\n      setError(TP_CREATE_ERROR(SystemError, \"async error on socket\", error));\n    }\n    return;\n  }\n  if (events & EPOLLHUP) {\n    setError(TP_CREATE_ERROR(EOFError));\n    return;\n  }\n  TP_ARG_CHECK_EQ(events, EPOLLIN);\n\n  Error error;\n  Socket socket;\n  std::tie(error, socket) = socket_.accept();\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n\n  TP_DCHECK(!fns_.empty())\n      << \"when the callback is disarmed the listener's descriptor is supposed \"\n      << \"to be unregistered\";\n  auto fn = std::move(fns_.front());\n  fns_.pop_front();\n  if (fns_.empty()) {\n    context_->unregisterDescriptor(socket_.fd());\n  }\n  fn(Error::kSuccess, createAndInitConnection(std::move(socket)));\n}\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/listener_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <deque>\n#include <functional>\n#include <mutex>\n#include <vector>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/epoll_loop.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/transport/error.h>\n#include <tensorpipe/transport/ibv/sockaddr.h>\n#include <tensorpipe/transport/listener_impl_boilerplate.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nclass ConnectionImpl;\nclass ContextImpl;\n\nclass ListenerImpl final\n    : public ListenerImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>,\n      public EpollLoop::EventHandler {\n public:\n  // Create a listener that listens on the specified address.\n  ListenerImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::string addr);\n\n  // Implementation of EventHandler.\n  void handleEventsFromLoop(int events) override;\n\n protected:\n  // Implement the entry points called by ListenerImplBoilerplate.\n  void initImplFromLoop() override;\n  void acceptImplFromLoop(accept_callback_fn fn) override;\n  std::string addrImplFromLoop() const override;\n  void handleErrorImpl() override;\n\n private:\n  Socket socket_;\n  Sockaddr sockaddr_;\n  std::deque<accept_callback_fn> fns_;\n};\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/reactor.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/ibv/reactor.h>\n\n#include <tensorpipe/common/system.h>\n#include <tensorpipe/transport/ibv/constants.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nReactor::Reactor(IbvLib ibvLib, IbvDeviceList deviceList)\n    : ibvLib_(std::move(ibvLib)) {\n  TP_DCHECK_GE(deviceList.size(), 1);\n  ctx_ = createIbvContext(getIbvLib(), deviceList[0]);\n  pd_ = createIbvProtectionDomain(getIbvLib(), ctx_);\n  cq_ = createIbvCompletionQueue(\n      getIbvLib(),\n      ctx_,\n      kCompletionQueueSize,\n      /*cq_context=*/nullptr,\n      /*channel=*/nullptr,\n      /*comp_vector=*/0);\n\n  IbvLib::srq_init_attr srqInitAttr;\n  std::memset(&srqInitAttr, 0, sizeof(srqInitAttr));\n  srqInitAttr.attr.max_wr = kNumPendingRecvReqs;\n  srq_ = createIbvSharedReceiveQueue(getIbvLib(), pd_, srqInitAttr);\n\n  addr_ = makeIbvAddress(getIbvLib(), ctx_, kPortNum, kGlobalIdentifierIndex);\n\n  postRecvRequestsOnSRQ(kNumPendingRecvReqs);\n\n  startThread(\"TP_IBV_reactor\");\n}\n\nvoid Reactor::postRecvRequestsOnSRQ(int num) {\n  while (num > 0) {\n    IbvLib::recv_wr* badRecvWr = nullptr;\n    std::array<IbvLib::recv_wr, kNumPolledWorkCompletions> wrs;\n    std::memset(wrs.data(), 0, sizeof(wrs));\n    for (int i = 0; i < std::min(num, kNumPolledWorkCompletions) - 1; i++) {\n      wrs[i].next = &wrs[i + 1];\n    }\n    int rv = getIbvLib().post_srq_recv(srq_.get(), wrs.data(), &badRecvWr);\n    TP_THROW_SYSTEM_IF(rv != 0, errno);\n    TP_THROW_ASSERT_IF(badRecvWr != nullptr);\n    num -= std::min(num, kNumPolledWorkCompletions);\n  }\n}\n\nvoid Reactor::setId(std::string id) {\n  id_ = std::move(id);\n}\n\nvoid Reactor::close() {\n  if (!closed_.exchange(true)) {\n    stopBusyPolling();\n  }\n}\n\nvoid Reactor::join() {\n  close();\n\n  if (!joined_.exchange(true)) {\n    joinThread();\n  }\n}\n\nReactor::~Reactor() {\n  join();\n}\n\nbool Reactor::pollOnce() {\n  std::array<IbvLib::wc, kNumPolledWorkCompletions> wcs;\n  auto rv = getIbvLib().poll_cq(cq_.get(), wcs.size(), wcs.data());\n\n  if (rv == 0) {\n    return false;\n  }\n  TP_THROW_SYSTEM_IF(rv < 0, errno);\n\n  int numRecvs = 0;\n  int numWrites = 0;\n  int numAcks = 0;\n  for (int wcIdx = 0; wcIdx < rv; wcIdx++) {\n    IbvLib::wc& wc = wcs[wcIdx];\n\n    TP_VLOG(9) << \"Transport context \" << id_\n               << \" got work completion for request \" << wc.wr_id << \" for QP \"\n               << wc.qp_num << \" with status \"\n               << getIbvLib().wc_status_str(wc.status) << \" and opcode \"\n               << ibvWorkCompletionOpcodeToStr(wc.opcode)\n               << \" (byte length: \" << wc.byte_len\n               << \", immediate data: \" << wc.imm_data << \")\";\n\n    auto iter = queuePairEventHandler_.find(wc.qp_num);\n    TP_THROW_ASSERT_IF(iter == queuePairEventHandler_.end())\n        << \"Got work completion for unknown queue pair \" << wc.qp_num;\n\n    if (wc.status != IbvLib::WC_SUCCESS) {\n      iter->second->onError(wc.status, wc.wr_id);\n      continue;\n    }\n\n    switch (wc.opcode) {\n      case IbvLib::WC_RECV_RDMA_WITH_IMM:\n        TP_THROW_ASSERT_IF(!(wc.wc_flags & IbvLib::WC_WITH_IMM));\n        iter->second->onRemoteProducedData(wc.imm_data);\n        numRecvs++;\n        break;\n      case IbvLib::WC_RECV:\n        TP_THROW_ASSERT_IF(!(wc.wc_flags & IbvLib::WC_WITH_IMM));\n        iter->second->onRemoteConsumedData(wc.imm_data);\n        numRecvs++;\n        break;\n      case IbvLib::WC_RDMA_WRITE:\n        iter->second->onWriteCompleted();\n        numWrites++;\n        break;\n      case IbvLib::WC_SEND:\n        iter->second->onAckCompleted();\n        numAcks++;\n        break;\n      default:\n        TP_THROW_ASSERT() << \"Unknown opcode: \" << wc.opcode;\n    }\n  }\n\n  postRecvRequestsOnSRQ(numRecvs);\n\n  numAvailableWrites_ += numWrites;\n  while (!pendingQpWrites_.empty() && numAvailableWrites_ > 0) {\n    postWrite(\n        std::get<0>(pendingQpWrites_.front()),\n        std::get<1>(pendingQpWrites_.front()));\n    pendingQpWrites_.pop_front();\n  }\n\n  numAvailableAcks_ += numAcks;\n  while (!pendingQpAcks_.empty() && numAvailableAcks_ > 0) {\n    postAck(\n        std::get<0>(pendingQpAcks_.front()),\n        std::get<1>(pendingQpAcks_.front()));\n    pendingQpAcks_.pop_front();\n  }\n\n  return true;\n}\n\nbool Reactor::readyToClose() {\n  return queuePairEventHandler_.size() == 0;\n}\n\nvoid Reactor::registerQp(\n    uint32_t qpn,\n    std::shared_ptr<IbvEventHandler> eventHandler) {\n  queuePairEventHandler_.emplace(qpn, std::move(eventHandler));\n}\n\nvoid Reactor::unregisterQp(uint32_t qpn) {\n  queuePairEventHandler_.erase(qpn);\n}\n\nvoid Reactor::postWrite(IbvQueuePair& qp, WriteInfo info) {\n  if (numAvailableWrites_ > 0) {\n    IbvLib::sge list;\n    list.addr = reinterpret_cast<uint64_t>(info.addr);\n    list.length = info.length;\n    list.lkey = info.lkey;\n\n    IbvLib::send_wr wr;\n    std::memset(&wr, 0, sizeof(wr));\n    wr.wr_id = kWriteRequestId;\n    wr.sg_list = &list;\n    wr.num_sge = 1;\n    wr.opcode = IbvLib::WR_RDMA_WRITE_WITH_IMM;\n    wr.imm_data = info.length;\n    wr.wr.rdma.remote_addr = info.remoteAddr;\n    wr.wr.rdma.rkey = info.rkey;\n\n    IbvLib::send_wr* badWr = nullptr;\n    TP_VLOG(9) << \"Transport context \" << id_ << \" posting RDMA write for QP \"\n               << qp->qp_num;\n    TP_CHECK_IBV_INT(getIbvLib().post_send(qp.get(), &wr, &badWr));\n    TP_THROW_ASSERT_IF(badWr != nullptr);\n    numAvailableWrites_--;\n  } else {\n    TP_VLOG(9) << \"Transport context \" << id_\n               << \" queueing up RDMA write for QP \" << qp->qp_num;\n    pendingQpWrites_.emplace_back(qp, info);\n  }\n}\n\nvoid Reactor::postAck(IbvQueuePair& qp, AckInfo info) {\n  if (numAvailableAcks_ > 0) {\n    IbvLib::send_wr wr;\n    std::memset(&wr, 0, sizeof(wr));\n    wr.wr_id = kAckRequestId;\n    wr.opcode = IbvLib::WR_SEND_WITH_IMM;\n    wr.imm_data = info.length;\n\n    IbvLib::send_wr* badWr = nullptr;\n    TP_VLOG(9) << \"Transport context \" << id_ << \" posting send for QP \"\n               << qp->qp_num;\n    TP_CHECK_IBV_INT(getIbvLib().post_send(qp.get(), &wr, &badWr));\n    TP_THROW_ASSERT_IF(badWr != nullptr);\n    numAvailableAcks_--;\n  } else {\n    TP_VLOG(9) << \"Transport context \" << id_ << \" queueing send for QP \"\n               << qp->qp_num;\n    pendingQpAcks_.emplace_back(qp, info);\n  }\n}\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/reactor.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <functional>\n#include <future>\n#include <list>\n#include <mutex>\n#include <set>\n#include <thread>\n#include <vector>\n\n#include <tensorpipe/common/busy_polling_loop.h>\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/fd.h>\n#include <tensorpipe/common/ibv.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/transport/ibv/constants.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nclass IbvEventHandler {\n public:\n  virtual void onRemoteProducedData(uint32_t length) = 0;\n\n  virtual void onRemoteConsumedData(uint32_t length) = 0;\n\n  virtual void onWriteCompleted() = 0;\n\n  virtual void onAckCompleted() = 0;\n\n  virtual void onError(IbvLib::wc_status status, uint64_t wrId) = 0;\n\n  virtual ~IbvEventHandler() = default;\n};\n\n// Reactor loop.\n//\n// Companion class to the event loop in `loop.h` that executes\n// functions on triggers. The triggers are posted to a shared memory\n// ring buffer, so this can be done by other processes on the same\n// machine. It uses extra data in the ring buffer header to store a\n// mutex and condition variable to avoid a busy loop.\n//\nclass Reactor final : public BusyPollingLoop {\n public:\n  Reactor(IbvLib ibvLib, IbvDeviceList deviceList);\n\n  const IbvLib& getIbvLib() {\n    return ibvLib_;\n  }\n\n  IbvProtectionDomain& getIbvPd() {\n    return pd_;\n  }\n\n  IbvCompletionQueue& getIbvCq() {\n    return cq_;\n  }\n\n  IbvSharedReceiveQueue& getIbvSrq() {\n    return srq_;\n  }\n\n  const IbvAddress& getIbvAddress() {\n    return addr_;\n  }\n\n  void registerQp(uint32_t qpn, std::shared_ptr<IbvEventHandler> eventHandler);\n\n  void unregisterQp(uint32_t qpn);\n\n  struct WriteInfo {\n    void* addr;\n    size_t length;\n    uint32_t lkey;\n    uint64_t remoteAddr;\n    uint32_t rkey;\n  };\n\n  void postWrite(IbvQueuePair& qp, WriteInfo info);\n\n  struct AckInfo {\n    size_t length;\n  };\n\n  void postAck(IbvQueuePair& qp, AckInfo info);\n\n  void setId(std::string id);\n\n  void close();\n\n  void join();\n\n  ~Reactor();\n\n protected:\n  bool pollOnce() override;\n\n  bool readyToClose() override;\n\n private:\n  // InfiniBand stuff\n  const IbvLib ibvLib_;\n  IbvContext ctx_;\n  IbvProtectionDomain pd_;\n  IbvCompletionQueue cq_;\n  IbvSharedReceiveQueue srq_;\n  IbvAddress addr_;\n\n  void postRecvRequestsOnSRQ(int num);\n\n  std::atomic<bool> closed_{false};\n  std::atomic<bool> joined_{false};\n\n  // An identifier for the context, composed of the identifier for the context,\n  // combined with the transport's name. It will only be used for logging and\n  // debugging purposes.\n  std::string id_{\"N/A\"};\n\n  // The registered event handlers for each queue pair.\n  std::unordered_map<uint32_t, std::shared_ptr<IbvEventHandler>>\n      queuePairEventHandler_;\n\n  uint32_t numAvailableWrites_{kNumPendingWriteReqs};\n  uint32_t numAvailableAcks_{kNumPendingAckReqs};\n  std::deque<std::tuple<IbvQueuePair&, WriteInfo>> pendingQpWrites_;\n  std::deque<std::tuple<IbvQueuePair&, AckInfo>> pendingQpAcks_;\n};\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/sockaddr.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/ibv/sockaddr.h>\n\n#include <array>\n#include <cstring>\n#include <sstream>\n#include <utility>\n\n#include <arpa/inet.h>\n#include <net/if.h>\n\n#include <tensorpipe/common/defs.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nSockaddr Sockaddr::createInetSockAddr(const std::string& str) {\n  int port = 0;\n  std::string addrStr;\n  std::string portStr;\n\n  // If the input string is an IPv6 address with port, the address\n  // itself must be wrapped with brackets.\n  if (addrStr.empty()) {\n    auto start = str.find(\"[\");\n    auto stop = str.find(\"]\");\n    if (start < stop && start != std::string::npos &&\n        stop != std::string::npos) {\n      addrStr = str.substr(start + 1, stop - (start + 1));\n      if (stop + 1 < str.size() && str[stop + 1] == ':') {\n        portStr = str.substr(stop + 2);\n      }\n    }\n  }\n\n  // If the input string is an IPv4 address with port, we expect\n  // at least a single period and a single colon in the string.\n  if (addrStr.empty()) {\n    auto period = str.find(\".\");\n    auto colon = str.find(\":\");\n    if (period != std::string::npos && colon != std::string::npos) {\n      addrStr = str.substr(0, colon);\n      portStr = str.substr(colon + 1);\n    }\n  }\n\n  // Fallback to using entire input string as address without port.\n  if (addrStr.empty()) {\n    addrStr = str;\n  }\n\n  // Parse port number if specified.\n  if (!portStr.empty()) {\n    port = std::stoi(portStr);\n    if (port < 0 || port > std::numeric_limits<uint16_t>::max()) {\n      TP_THROW_EINVAL() << str;\n    }\n  }\n\n  // Try to convert an IPv4 address.\n  {\n    struct sockaddr_in addr;\n    std::memset(&addr, 0, sizeof(addr));\n    auto rv = inet_pton(AF_INET, addrStr.c_str(), &addr.sin_addr);\n    TP_THROW_SYSTEM_IF(rv < 0, errno);\n    if (rv == 1) {\n      addr.sin_family = AF_INET;\n      addr.sin_port = ntohs(port);\n      return Sockaddr(reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr));\n    }\n  }\n\n  // Try to convert an IPv6 address.\n  {\n    struct sockaddr_in6 addr;\n    std::memset(&addr, 0, sizeof(addr));\n\n    auto interfacePos = addrStr.find('%');\n    if (interfacePos != std::string::npos) {\n      addr.sin6_scope_id =\n          if_nametoindex(addrStr.substr(interfacePos + 1).c_str());\n      addrStr = addrStr.substr(0, interfacePos);\n    }\n\n    auto rv = inet_pton(AF_INET6, addrStr.c_str(), &addr.sin6_addr);\n    TP_THROW_SYSTEM_IF(rv < 0, errno);\n    if (rv == 1) {\n      addr.sin6_family = AF_INET6;\n      addr.sin6_port = ntohs(port);\n      return Sockaddr(reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr));\n    }\n  }\n\n  // Invalid address.\n  TP_THROW_EINVAL() << str;\n\n  // Return bogus to silence \"return from non-void function\" warning.\n  // Note: we don't reach this point per the throw above.\n  return Sockaddr(nullptr, 0);\n}\n\nstd::string Sockaddr::str() const {\n  std::ostringstream oss;\n\n  if (addr_.ss_family == AF_INET) {\n    std::array<char, 64> buf;\n    auto in = reinterpret_cast<const struct sockaddr_in*>(&addr_);\n    auto rv = inet_ntop(AF_INET, &in->sin_addr, buf.data(), buf.size());\n    TP_THROW_SYSTEM_IF(rv == nullptr, errno);\n    oss << buf.data() << \":\" << htons(in->sin_port);\n  } else if (addr_.ss_family == AF_INET6) {\n    std::array<char, 64> buf;\n    auto in6 = reinterpret_cast<const struct sockaddr_in6*>(&addr_);\n    auto rv = inet_ntop(AF_INET6, &in6->sin6_addr, buf.data(), buf.size());\n    TP_THROW_SYSTEM_IF(rv == nullptr, errno);\n    oss << \"[\" << buf.data();\n    if (in6->sin6_scope_id > 0) {\n      std::array<char, IF_NAMESIZE> scopeBuf;\n      rv = if_indextoname(in6->sin6_scope_id, scopeBuf.data());\n      TP_THROW_SYSTEM_IF(rv == nullptr, errno);\n      oss << \"%\" << scopeBuf.data();\n    }\n    oss << \"]:\" << htons(in6->sin6_port);\n\n  } else {\n    TP_THROW_EINVAL() << \"invalid address family: \" << addr_.ss_family;\n  }\n\n  return oss.str();\n}\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/sockaddr.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <sys/socket.h>\n\n#include <cstring>\n#include <string>\n\n#include <tensorpipe/common/socket.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nclass Sockaddr final : public tensorpipe::Sockaddr {\n public:\n  static Sockaddr createInetSockAddr(const std::string& str);\n\n  Sockaddr(const struct sockaddr* addr, socklen_t addrlen) {\n    TP_ARG_CHECK(addr != nullptr);\n    TP_ARG_CHECK_LE(addrlen, sizeof(addr_));\n    // Ensure the sockaddr_storage is zeroed, because we don't always\n    // write to all fields in the `sockaddr_[in|in6]` structures.\n    std::memset(&addr_, 0, sizeof(addr_));\n    std::memcpy(&addr_, addr, addrlen);\n    addrlen_ = addrlen;\n  }\n\n  inline const struct sockaddr* addr() const override {\n    return reinterpret_cast<const struct sockaddr*>(&addr_);\n  }\n\n  inline struct sockaddr* addr() {\n    return reinterpret_cast<struct sockaddr*>(&addr_);\n  }\n\n  inline socklen_t addrlen() const override {\n    return addrlen_;\n  }\n\n  std::string str() const;\n\n private:\n  struct sockaddr_storage addr_;\n  socklen_t addrlen_;\n};\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/utility.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/ibv/utility.h>\n\n#include <array>\n#include <climits>\n#include <cstring>\n#include <memory>\n#include <string>\n#include <tuple>\n#include <utility>\n\n#include <ifaddrs.h>\n#include <netdb.h>\n#include <netinet/in.h>\n#include <netinet/ip.h>\n#include <sys/socket.h>\n#include <sys/types.h>\n#include <unistd.h>\n\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/common/socket.h>\n#include <tensorpipe/transport/ibv/error.h>\n#include <tensorpipe/transport/ibv/sockaddr.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nnamespace {\n\nstruct InterfaceAddressesDeleter {\n  void operator()(struct ifaddrs* ptr) {\n    ::freeifaddrs(ptr);\n  }\n};\n\nusing InterfaceAddresses =\n    std::unique_ptr<struct ifaddrs, InterfaceAddressesDeleter>;\n\nstd::tuple<Error, InterfaceAddresses> createInterfaceAddresses() {\n  struct ifaddrs* ifaddrs;\n  auto rv = ::getifaddrs(&ifaddrs);\n  if (rv < 0) {\n    return std::make_tuple(\n        TP_CREATE_ERROR(SystemError, \"getifaddrs\", errno),\n        InterfaceAddresses());\n  }\n  return std::make_tuple(Error::kSuccess, InterfaceAddresses(ifaddrs));\n}\n\nstd::tuple<Error, std::string> getHostname() {\n  std::array<char, HOST_NAME_MAX> hostname;\n  auto rv = ::gethostname(hostname.data(), hostname.size());\n  if (rv < 0) {\n    return std::make_tuple(\n        TP_CREATE_ERROR(SystemError, \"gethostname\", errno), std::string());\n  }\n  return std::make_tuple(Error::kSuccess, std::string(hostname.data()));\n}\n\nstruct AddressInfoDeleter {\n  void operator()(struct addrinfo* ptr) {\n    ::freeaddrinfo(ptr);\n  }\n};\n\nusing AddressInfo = std::unique_ptr<struct addrinfo, AddressInfoDeleter>;\n\nstd::tuple<Error, AddressInfo> createAddressInfo(std::string host) {\n  struct addrinfo hints;\n  std::memset(&hints, 0, sizeof(hints));\n  hints.ai_family = AF_UNSPEC;\n  hints.ai_socktype = SOCK_STREAM;\n  hints.ai_protocol = IPPROTO_TCP;\n\n  struct addrinfo* result;\n  auto rv = ::getaddrinfo(host.c_str(), nullptr, &hints, &result);\n  if (rv != 0) {\n    return std::make_tuple(\n        TP_CREATE_ERROR(GetaddrinfoError, rv), AddressInfo());\n  }\n  return std::make_tuple(Error::kSuccess, AddressInfo(result));\n}\n\n} // namespace\n\nstd::tuple<Error, std::string> lookupAddrForIface(std::string iface) {\n  Error error;\n  InterfaceAddresses addresses;\n  std::tie(error, addresses) = createInterfaceAddresses();\n  if (error) {\n    return std::make_tuple(std::move(error), std::string());\n  }\n\n  struct ifaddrs* ifa;\n  for (ifa = addresses.get(); ifa != nullptr; ifa = ifa->ifa_next) {\n    // Skip entry if ifa_addr is NULL (see getifaddrs(3))\n    if (ifa->ifa_addr == nullptr) {\n      continue;\n    }\n\n    if (iface != ifa->ifa_name) {\n      continue;\n    }\n\n    switch (ifa->ifa_addr->sa_family) {\n      case AF_INET:\n        return std::make_tuple(\n            Error::kSuccess,\n            Sockaddr(ifa->ifa_addr, sizeof(struct sockaddr_in)).str());\n      case AF_INET6:\n        return std::make_tuple(\n            Error::kSuccess,\n            Sockaddr(ifa->ifa_addr, sizeof(struct sockaddr_in6)).str());\n    }\n  }\n\n  return std::make_tuple(TP_CREATE_ERROR(NoAddrFoundError), std::string());\n}\n\nstd::tuple<Error, std::string> lookupAddrForHostname() {\n  Error error;\n  std::string hostname;\n  std::tie(error, hostname) = getHostname();\n  if (error) {\n    return std::make_tuple(std::move(error), std::string());\n  }\n\n  AddressInfo info;\n  std::tie(error, info) = createAddressInfo(std::move(hostname));\n  if (error) {\n    return std::make_tuple(std::move(error), std::string());\n  }\n\n  Error firstError;\n  for (struct addrinfo* rp = info.get(); rp != nullptr; rp = rp->ai_next) {\n    TP_DCHECK(rp->ai_family == AF_INET || rp->ai_family == AF_INET6);\n    TP_DCHECK_EQ(rp->ai_socktype, SOCK_STREAM);\n    TP_DCHECK_EQ(rp->ai_protocol, IPPROTO_TCP);\n\n    Sockaddr addr = Sockaddr(rp->ai_addr, rp->ai_addrlen);\n\n    Socket socket;\n    std::tie(error, socket) = Socket::createForFamily(rp->ai_family);\n\n    if (!error) {\n      error = socket.bind(addr);\n    }\n\n    if (error) {\n      // Record the first binding error we encounter and return that in the end\n      // if no working address is found, in order to help with debugging.\n      if (!firstError) {\n        firstError = error;\n      }\n      continue;\n    }\n\n    return std::make_tuple(Error::kSuccess, addr.str());\n  }\n\n  if (firstError) {\n    return std::make_tuple(std::move(firstError), std::string());\n  } else {\n    return std::make_tuple(TP_CREATE_ERROR(NoAddrFoundError), std::string());\n  }\n}\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/ibv/utility.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n#include <tuple>\n\n#include <tensorpipe/common/error.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace ibv {\n\nstd::tuple<Error, std::string> lookupAddrForIface(std::string iface);\n\nstd::tuple<Error, std::string> lookupAddrForHostname();\n\n} // namespace ibv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/listener.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <functional>\n#include <memory>\n#include <string>\n\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace transport {\n\nclass Listener {\n public:\n  using accept_callback_fn = std::function<\n      void(const Error& error, std::shared_ptr<Connection> connection)>;\n\n  virtual void accept(accept_callback_fn fn) = 0;\n\n  // Return address that this listener is listening on.\n  // This may be required if the listening address is not known up\n  // front, or dynamically populated by the operating system (e.g. by\n  // letting the operating system pick a TCP port to listen on).\n  virtual std::string addr() const = 0;\n\n  // Tell the listener what its identifier is.\n  //\n  // This is only supposed to be called from the high-level listener or from\n  // channel contexts. It will only used for logging and debugging purposes.\n  virtual void setId(std::string id) = 0;\n\n  virtual void close() = 0;\n\n  virtual ~Listener() = default;\n};\n\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/listener_boilerplate.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n#include <type_traits>\n#include <utility>\n\n#include <tensorpipe/transport/listener.h>\n#include <tensorpipe/transport/listener_impl_boilerplate.h>\n\nnamespace tensorpipe {\nnamespace transport {\n\ntemplate <typename TCtx, typename TList, typename TConn>\nclass ListenerBoilerplate : public Listener {\n public:\n  template <typename... Args>\n  ListenerBoilerplate(\n      typename ListenerImplBoilerplate<TCtx, TList, TConn>::ConstructorToken\n          token,\n      std::shared_ptr<TCtx> context,\n      std::string id,\n      Args... args);\n\n  explicit ListenerBoilerplate(std::shared_ptr<TList> listener);\n\n  ListenerBoilerplate(const ListenerBoilerplate&) = delete;\n  ListenerBoilerplate(ListenerBoilerplate&&) = delete;\n  ListenerBoilerplate& operator=(const ListenerBoilerplate&) = delete;\n  ListenerBoilerplate& operator=(ListenerBoilerplate&&) = delete;\n\n  // Queue a callback to be called when a connection comes in.\n  void accept(accept_callback_fn fn) override;\n\n  // Obtain the listener's address.\n  std::string addr() const override;\n\n  // Tell the listener what its identifier is.\n  void setId(std::string id) override;\n\n  // Shut down the connection and its resources.\n  void close() override;\n\n  ~ListenerBoilerplate() override;\n\n protected:\n  // Using a shared_ptr allows us to detach the lifetime of the implementation\n  // from the public object's one and perform the destruction asynchronously.\n  const std::shared_ptr<TList> impl_;\n};\n\ntemplate <typename TCtx, typename TList, typename TConn>\ntemplate <typename... Args>\nListenerBoilerplate<TCtx, TList, TConn>::ListenerBoilerplate(\n    typename ListenerImplBoilerplate<TCtx, TList, TConn>::ConstructorToken\n        token,\n    std::shared_ptr<TCtx> context,\n    std::string id,\n    Args... args)\n    : impl_(std::make_shared<TList>(\n          token,\n          std::move(context),\n          std::move(id),\n          std::forward<Args>(args)...)) {\n  static_assert(\n      std::is_base_of<ListenerImplBoilerplate<TCtx, TList, TConn>, TList>::\n          value,\n      \"\");\n  impl_->init();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nListenerBoilerplate<TCtx, TList, TConn>::ListenerBoilerplate(\n    std::shared_ptr<TList> listener)\n    : impl_(std::move(listener)) {\n  static_assert(\n      std::is_base_of<ListenerImplBoilerplate<TCtx, TList, TConn>, TList>::\n          value,\n      \"\");\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerBoilerplate<TCtx, TList, TConn>::accept(accept_callback_fn fn) {\n  if (unlikely(!impl_)) {\n    // FIXME In C++-17 perhaps a global static inline variable would be better?\n    static Error error = TP_CREATE_ERROR(ContextNotViableError);\n    fn(error, nullptr);\n    return;\n  }\n  impl_->accept(std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nstd::string ListenerBoilerplate<TCtx, TList, TConn>::addr() const {\n  if (unlikely(!impl_)) {\n    return \"\";\n  }\n  return impl_->addr();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerBoilerplate<TCtx, TList, TConn>::setId(std::string id) {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->setId(std::move(id));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerBoilerplate<TCtx, TList, TConn>::close() {\n  if (unlikely(!impl_)) {\n    return;\n  }\n  impl_->close();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nListenerBoilerplate<TCtx, TList, TConn>::~ListenerBoilerplate() {\n  close();\n}\n\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/listener_impl_boilerplate.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <memory>\n#include <string>\n#include <utility>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/transport/connection_boilerplate.h>\n#include <tensorpipe/transport/error.h>\n#include <tensorpipe/transport/listener.h>\n\nnamespace tensorpipe {\nnamespace transport {\n\ntemplate <typename TCtx, typename TList, typename TConn>\nclass ListenerImplBoilerplate : public std::enable_shared_from_this<TList> {\n public:\n  class ConstructorToken {\n   public:\n    ConstructorToken(const ConstructorToken&) = default;\n\n   private:\n    explicit ConstructorToken() {}\n    friend ContextImplBoilerplate<TCtx, TList, TConn>;\n    friend ListenerImplBoilerplate<TCtx, TList, TConn>;\n  };\n\n  ListenerImplBoilerplate(\n      ConstructorToken token,\n      std::shared_ptr<TCtx> context,\n      std::string id);\n\n  ListenerImplBoilerplate(const ListenerImplBoilerplate&) = delete;\n  ListenerImplBoilerplate(ListenerImplBoilerplate&&) = delete;\n  ListenerImplBoilerplate& operator=(const ListenerImplBoilerplate&) = delete;\n  ListenerImplBoilerplate& operator=(ListenerImplBoilerplate&&) = delete;\n\n  // Initialize member fields that need `shared_from_this`.\n  void init();\n\n  // Queue a callback to be called when a connection comes in.\n  using accept_callback_fn = Listener::accept_callback_fn;\n  void accept(accept_callback_fn fn);\n\n  // Obtain the listener's address.\n  std::string addr() const;\n\n  // Tell the listener what its identifier is.\n  void setId(std::string id);\n\n  // Shut down the listener and its resources.\n  void close();\n\n  virtual ~ListenerImplBoilerplate() = default;\n\n protected:\n  virtual void initImplFromLoop() = 0;\n  virtual void acceptImplFromLoop(accept_callback_fn fn) = 0;\n  virtual std::string addrImplFromLoop() const = 0;\n  virtual void handleErrorImpl() = 0;\n\n  void setError(Error error);\n\n  const std::shared_ptr<TCtx> context_;\n\n  Error error_{Error::kSuccess};\n\n  template <typename... Args>\n  std::shared_ptr<Connection> createAndInitConnection(Args&&... args);\n\n  // An identifier for the listener, composed of the identifier for the context,\n  // combined with an increasing sequence number. It will be used as a prefix\n  // for the identifiers of connections. All of them will only be used for\n  // logging and debugging purposes.\n  std::string id_;\n\n private:\n  // Initialize member fields that need `shared_from_this`.\n  void initFromLoop();\n\n  // Queue a callback to be called when a connection comes in.\n  void acceptFromLoop(accept_callback_fn fn);\n\n  // Obtain the listener's address.\n  std::string addrFromLoop() const;\n\n  void setIdFromLoop(std::string id);\n\n  // Shut down the connection and its resources.\n  void closeFromLoop();\n\n  // Deal with an error.\n  void handleError();\n\n  // A sequence number for the calls to accept.\n  uint64_t nextConnectionBeingAccepted_{0};\n\n  // Sequence numbers for the connections created by this listener, used to\n  // create their identifiers based off this listener's identifier. They will\n  // only be used for logging and debugging.\n  std::atomic<uint64_t> connectionCounter_{0};\n\n  // Contexts do sometimes need to call directly into closeFromLoop, in order to\n  // make sure that some of their operations can happen \"atomically\" on the\n  // connection, without possibly other operations occurring in between (e.g.,\n  // an error).\n  friend ContextImplBoilerplate<TCtx, TList, TConn>;\n};\n\ntemplate <typename TCtx, typename TList, typename TConn>\nListenerImplBoilerplate<TCtx, TList, TConn>::ListenerImplBoilerplate(\n    ConstructorToken /* unused */,\n    std::shared_ptr<TCtx> context,\n    std::string id)\n    : context_(std::move(context)), id_(std::move(id)) {}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerImplBoilerplate<TCtx, TList, TConn>::init() {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}]() { impl->initFromLoop(); });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerImplBoilerplate<TCtx, TList, TConn>::initFromLoop() {\n  if (context_->closed()) {\n    // Set the error without calling setError because we do not want to invoke\n    // the subclass's handleErrorImpl as it would find itself in a weird state\n    // (since initFromLoop wouldn't have been called).\n    error_ = TP_CREATE_ERROR(ListenerClosedError);\n    TP_VLOG(7) << \"Listener \" << id_ << \" is closing (without initing)\";\n    return;\n  }\n\n  initImplFromLoop();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerImplBoilerplate<TCtx, TList, TConn>::accept(\n    accept_callback_fn fn) {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}, fn{std::move(fn)}]() mutable {\n        impl->acceptFromLoop(std::move(fn));\n      });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerImplBoilerplate<TCtx, TList, TConn>::acceptFromLoop(\n    accept_callback_fn fn) {\n  TP_DCHECK(context_->inLoop());\n\n  uint64_t sequenceNumber = nextConnectionBeingAccepted_++;\n  TP_VLOG(7) << \"Listener \" << id_ << \" received an accept request (#\"\n             << sequenceNumber << \")\";\n\n  fn = [this, sequenceNumber, fn{std::move(fn)}](\n           const Error& error, std::shared_ptr<Connection> connection) {\n    TP_VLOG(7) << \"Listener \" << id_ << \" is calling an accept callback (#\"\n               << sequenceNumber << \")\";\n    fn(error, std::move(connection));\n    TP_VLOG(7) << \"Listener \" << id_ << \" done calling an accept callback (#\"\n               << sequenceNumber << \")\";\n  };\n\n  if (error_) {\n    fn(error_, std::shared_ptr<Connection>());\n    return;\n  }\n\n  acceptImplFromLoop(std::move(fn));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nstd::string ListenerImplBoilerplate<TCtx, TList, TConn>::addr() const {\n  std::string addr;\n  context_->runInLoop([this, &addr]() { addr = addrFromLoop(); });\n  return addr;\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nstd::string ListenerImplBoilerplate<TCtx, TList, TConn>::addrFromLoop() const {\n  TP_DCHECK(context_->inLoop());\n\n  return addrImplFromLoop();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\ntemplate <typename... Args>\nstd::shared_ptr<Connection> ListenerImplBoilerplate<TCtx, TList, TConn>::\n    createAndInitConnection(Args&&... args) {\n  TP_DCHECK(context_->inLoop());\n  std::string connectionId = id_ + \".c\" + std::to_string(connectionCounter_++);\n  TP_VLOG(7) << \"Listener \" << id_ << \" is opening connection \" << connectionId;\n  auto connection = std::make_shared<TConn>(\n      typename ConnectionImplBoilerplate<TCtx, TList, TConn>::\n          ConstructorToken(),\n      context_,\n      std::move(connectionId),\n      std::forward<Args>(args)...);\n  // We initialize the connection from the loop immediately, inline, because the\n  // initialization of a connection accepted by a listener typically happens\n  // partly in the listener (e.g., opening and accepting the socket) and partly\n  // in the connection's initFromLoop, and we need these two steps to happen\n  // \"atomicically\" to make it impossible for an error to occur in between.\n  connection->initFromLoop();\n  return std::make_shared<ConnectionBoilerplate<TCtx, TList, TConn>>(\n      std::move(connection));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerImplBoilerplate<TCtx, TList, TConn>::setId(std::string id) {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}, id{std::move(id)}]() mutable {\n        impl->setIdFromLoop(std::move(id));\n      });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerImplBoilerplate<TCtx, TList, TConn>::setIdFromLoop(\n    std::string id) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(7) << \"Listener \" << id_ << \" was renamed to \" << id;\n  id_ = std::move(id);\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerImplBoilerplate<TCtx, TList, TConn>::close() {\n  context_->deferToLoop(\n      [impl{this->shared_from_this()}]() { impl->closeFromLoop(); });\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerImplBoilerplate<TCtx, TList, TConn>::closeFromLoop() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(7) << \"Listener \" << id_ << \" is closing\";\n  setError(TP_CREATE_ERROR(ListenerClosedError));\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerImplBoilerplate<TCtx, TList, TConn>::setError(Error error) {\n  // Don't overwrite an error that's already set.\n  if (error_ || !error) {\n    return;\n  }\n\n  error_ = std::move(error);\n\n  handleError();\n}\n\ntemplate <typename TCtx, typename TList, typename TConn>\nvoid ListenerImplBoilerplate<TCtx, TList, TConn>::handleError() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(8) << \"Listener \" << id_ << \" is handling error \" << error_.what();\n\n  handleErrorImpl();\n}\n\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/connection_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/shm/connection_impl.h>\n\n#include <string.h>\n\n#include <deque>\n#include <vector>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/epoll_loop.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/common/ringbuffer_read_write_ops.h>\n#include <tensorpipe/common/ringbuffer_role.h>\n#include <tensorpipe/common/shm_ringbuffer.h>\n#include <tensorpipe/transport/error.h>\n#include <tensorpipe/transport/shm/context_impl.h>\n#include <tensorpipe/transport/shm/reactor.h>\n#include <tensorpipe/transport/shm/sockaddr.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\nConnectionImpl::ConnectionImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    Socket socket)\n    : ConnectionImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      socket_(std::move(socket)) {}\n\nConnectionImpl::ConnectionImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::string addr)\n    : ConnectionImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      sockaddr_(Sockaddr::createAbstractUnixAddr(addr)) {}\n\nvoid ConnectionImpl::initImplFromLoop() {\n  context_->enroll(*this);\n\n  Error error;\n  // The connection either got a socket or an address, but not both.\n  TP_DCHECK(socket_.hasValue() ^ sockaddr_.has_value());\n  if (!socket_.hasValue()) {\n    std::tie(error, socket_) = Socket::createForFamily(AF_UNIX);\n    if (error) {\n      setError(std::move(error));\n      return;\n    }\n    error = socket_.connect(sockaddr_.value());\n    if (error) {\n      setError(std::move(error));\n      return;\n    }\n  }\n  // Ensure underlying control socket is non-blocking such that it\n  // works well with event driven I/O.\n  error = socket_.block(false);\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n\n  // Create ringbuffer for inbox.\n  std::tie(error, inboxHeaderSegment_, inboxDataSegment_, inboxRb_) =\n      createShmRingBuffer<kNumRingbufferRoles>(kBufferSize);\n  TP_THROW_ASSERT_IF(error)\n      << \"Couldn't allocate ringbuffer for connection inbox: \" << error.what();\n\n  // Register method to be called when our peer writes to our inbox.\n  inboxReactorToken_ = context_->addReaction([this]() {\n    TP_VLOG(9) << \"Connection \" << id_\n               << \" is reacting to the peer writing to the inbox\";\n    processReadOperationsFromLoop();\n  });\n\n  // Register method to be called when our peer reads from our outbox.\n  outboxReactorToken_ = context_->addReaction([this]() {\n    TP_VLOG(9) << \"Connection \" << id_\n               << \" is reacting to the peer reading from the outbox\";\n    processWriteOperationsFromLoop();\n  });\n\n  // We're sending file descriptors first, so wait for writability.\n  state_ = SEND_FDS;\n  context_->registerDescriptor(socket_.fd(), EPOLLOUT, shared_from_this());\n}\n\nvoid ConnectionImpl::readImplFromLoop(read_callback_fn fn) {\n  readOperations_.emplace_back(std::move(fn));\n\n  // If the inbox already contains some data, we may be able to process this\n  // operation right away.\n  processReadOperationsFromLoop();\n}\n\nvoid ConnectionImpl::readImplFromLoop(\n    AbstractNopHolder& object,\n    read_nop_callback_fn fn) {\n  readOperations_.emplace_back(\n      &object,\n      [fn{std::move(fn)}](\n          const Error& error, const void* /* unused */, size_t /* unused */) {\n        fn(error);\n      });\n\n  // If the inbox already contains some data, we may be able to process this\n  // operation right away.\n  processReadOperationsFromLoop();\n}\n\nvoid ConnectionImpl::readImplFromLoop(\n    void* ptr,\n    size_t length,\n    read_callback_fn fn) {\n  readOperations_.emplace_back(ptr, length, std::move(fn));\n\n  // If the inbox already contains some data, we may be able to process this\n  // operation right away.\n  processReadOperationsFromLoop();\n}\n\nvoid ConnectionImpl::writeImplFromLoop(\n    const void* ptr,\n    size_t length,\n    write_callback_fn fn) {\n  writeOperations_.emplace_back(ptr, length, std::move(fn));\n\n  // If the outbox has some free space, we may be able to process this operation\n  // right away.\n  processWriteOperationsFromLoop();\n}\n\nvoid ConnectionImpl::writeImplFromLoop(\n    const AbstractNopHolder& object,\n    write_callback_fn fn) {\n  writeOperations_.emplace_back(&object, std::move(fn));\n\n  // If the outbox has some free space, we may be able to process this operation\n  // right away.\n  processWriteOperationsFromLoop();\n}\n\nvoid ConnectionImpl::handleEventsFromLoop(int events) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Connection \" << id_ << \" is handling an event on its socket (\"\n             << EpollLoop::formatEpollEvents(events) << \")\";\n\n  // Handle only one of the events in the mask. Events on the control\n  // file descriptor are rare enough for the cost of having epoll call\n  // into this function multiple times to not matter. The benefit is\n  // that every handler can close and unregister the control file\n  // descriptor from the event loop, without worrying about the next\n  // handler trying to do so as well.\n  // In some cases the socket could be in a state where it's both in an error\n  // state and readable/writable. If we checked for EPOLLIN or EPOLLOUT first\n  // and then returned after handling them, we would keep doing so forever and\n  // never reach the error handling. So we should keep the error check first.\n  if (events & EPOLLERR) {\n    int error;\n    socklen_t errorlen = sizeof(error);\n    int rv = getsockopt(\n        socket_.fd(),\n        SOL_SOCKET,\n        SO_ERROR,\n        reinterpret_cast<void*>(&error),\n        &errorlen);\n    if (rv == -1) {\n      setError(TP_CREATE_ERROR(SystemError, \"getsockopt\", rv));\n    } else {\n      setError(TP_CREATE_ERROR(SystemError, \"async error on socket\", error));\n    }\n    return;\n  }\n  if (events & EPOLLIN) {\n    handleEventInFromLoop();\n    return;\n  }\n  if (events & EPOLLOUT) {\n    handleEventOutFromLoop();\n    return;\n  }\n  // Check for hangup last, as there could be cases where we get EPOLLHUP but\n  // there's still data to be read from the socket, so we want to deal with that\n  // before dealing with the hangup.\n  if (events & EPOLLHUP) {\n    setError(TP_CREATE_ERROR(EOFError));\n    return;\n  }\n}\n\nvoid ConnectionImpl::handleEventInFromLoop() {\n  TP_DCHECK(context_->inLoop());\n  if (state_ == RECV_FDS) {\n    Fd reactorHeaderFd;\n    Fd reactorDataFd;\n    Fd outboxHeaderFd;\n    Fd outboxDataFd;\n    Reactor::TToken peerInboxReactorToken;\n    Reactor::TToken peerOutboxReactorToken;\n\n    // Receive the reactor token, reactor fds, and inbox fds.\n    auto err = socket_.recvPayloadAndFds(\n        peerInboxReactorToken,\n        peerOutboxReactorToken,\n        reactorHeaderFd,\n        reactorDataFd,\n        outboxHeaderFd,\n        outboxDataFd);\n    if (err) {\n      setError(std::move(err));\n      return;\n    }\n\n    // Load ringbuffer for outbox.\n    std::tie(err, outboxHeaderSegment_, outboxDataSegment_, outboxRb_) =\n        loadShmRingBuffer<kNumRingbufferRoles>(\n            std::move(outboxHeaderFd), std::move(outboxDataFd));\n    TP_THROW_ASSERT_IF(err)\n        << \"Couldn't access ringbuffer of connection outbox: \" << err.what();\n\n    // Initialize remote reactor trigger.\n    peerReactorTrigger_.emplace(\n        std::move(reactorHeaderFd), std::move(reactorDataFd));\n\n    peerInboxReactorToken_ = peerInboxReactorToken;\n    peerOutboxReactorToken_ = peerOutboxReactorToken;\n\n    // The connection is usable now.\n    state_ = ESTABLISHED;\n    processWriteOperationsFromLoop();\n    // Trigger read operations in case a pair of local read() and remote\n    // write() happened before connection is established. Otherwise read()\n    // callback would lose if it's the only read() request.\n    processReadOperationsFromLoop();\n    return;\n  }\n\n  if (state_ == ESTABLISHED) {\n    // We don't expect to read anything on this socket once the\n    // connection has been established. If we do, assume it's a\n    // zero-byte read indicating EOF.\n    setError(TP_CREATE_ERROR(EOFError));\n    return;\n  }\n\n  TP_THROW_ASSERT() << \"EPOLLIN event not handled in state \" << state_;\n}\n\nvoid ConnectionImpl::handleEventOutFromLoop() {\n  TP_DCHECK(context_->inLoop());\n  if (state_ == SEND_FDS) {\n    int reactorHeaderFd;\n    int reactorDataFd;\n    std::tie(reactorHeaderFd, reactorDataFd) = context_->reactorFds();\n\n    // Send our reactor token, reactor fds, and inbox fds.\n    auto err = socket_.sendPayloadAndFds(\n        inboxReactorToken_.value(),\n        outboxReactorToken_.value(),\n        reactorHeaderFd,\n        reactorDataFd,\n        inboxHeaderSegment_.getFd(),\n        inboxDataSegment_.getFd());\n    if (err) {\n      setError(std::move(err));\n      return;\n    }\n\n    // Sent our fds. Wait for fds from peer.\n    state_ = RECV_FDS;\n    context_->registerDescriptor(socket_.fd(), EPOLLIN, shared_from_this());\n    return;\n  }\n\n  TP_THROW_ASSERT() << \"EPOLLOUT event not handled in state \" << state_;\n}\n\nvoid ConnectionImpl::processReadOperationsFromLoop() {\n  TP_DCHECK(context_->inLoop());\n\n  // Process all read read operations that we can immediately serve, only\n  // when connection is established.\n  if (state_ != ESTABLISHED) {\n    return;\n  }\n  // Serve read operations\n  Consumer inboxConsumer(inboxRb_);\n  while (!readOperations_.empty()) {\n    RingbufferReadOperation& readOperation = readOperations_.front();\n    if (readOperation.handleRead(inboxConsumer) > 0) {\n      peerReactorTrigger_->run(peerOutboxReactorToken_.value());\n    }\n    if (readOperation.completed()) {\n      readOperations_.pop_front();\n    } else {\n      break;\n    }\n  }\n}\n\nvoid ConnectionImpl::processWriteOperationsFromLoop() {\n  TP_DCHECK(context_->inLoop());\n\n  if (state_ != ESTABLISHED) {\n    return;\n  }\n\n  Producer outboxProducer(outboxRb_);\n  while (!writeOperations_.empty()) {\n    RingbufferWriteOperation& writeOperation = writeOperations_.front();\n    if (writeOperation.handleWrite(outboxProducer) > 0) {\n      peerReactorTrigger_->run(peerInboxReactorToken_.value());\n    }\n    if (writeOperation.completed()) {\n      writeOperations_.pop_front();\n    } else {\n      break;\n    }\n  }\n}\n\nvoid ConnectionImpl::handleErrorImpl() {\n  for (auto& readOperation : readOperations_) {\n    readOperation.handleError(error_);\n  }\n  readOperations_.clear();\n  for (auto& writeOperation : writeOperations_) {\n    writeOperation.handleError(error_);\n  }\n  writeOperations_.clear();\n  if (inboxReactorToken_.has_value()) {\n    context_->removeReaction(inboxReactorToken_.value());\n    inboxReactorToken_.reset();\n  }\n  if (outboxReactorToken_.has_value()) {\n    context_->removeReaction(outboxReactorToken_.value());\n    outboxReactorToken_.reset();\n  }\n  if (socket_.hasValue()) {\n    if (state_ > INITIALIZING) {\n      context_->unregisterDescriptor(socket_.fd());\n    }\n    socket_.reset();\n  }\n\n  context_->unenroll(*this);\n}\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/connection_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <deque>\n#include <memory>\n#include <string>\n\n#include <tensorpipe/common/epoll_loop.h>\n#include <tensorpipe/common/nop.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/common/ringbuffer.h>\n#include <tensorpipe/common/ringbuffer_read_write_ops.h>\n#include <tensorpipe/common/shm_segment.h>\n#include <tensorpipe/common/socket.h>\n#include <tensorpipe/transport/connection_impl_boilerplate.h>\n#include <tensorpipe/transport/shm/reactor.h>\n#include <tensorpipe/transport/shm/sockaddr.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\nclass ContextImpl;\nclass ListenerImpl;\n\nclass ConnectionImpl final : public ConnectionImplBoilerplate<\n                                 ContextImpl,\n                                 ListenerImpl,\n                                 ConnectionImpl>,\n                             public EpollLoop::EventHandler {\n  constexpr static size_t kBufferSize = 2 * 1024 * 1024;\n\n  constexpr static int kNumRingbufferRoles = 2;\n  using Consumer = RingBufferRole<kNumRingbufferRoles, 0>;\n  using Producer = RingBufferRole<kNumRingbufferRoles, 1>;\n\n  enum State {\n    INITIALIZING = 1,\n    SEND_FDS,\n    RECV_FDS,\n    ESTABLISHED,\n  };\n\n public:\n  // Create a connection that is already connected (e.g. from a listener).\n  ConnectionImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      Socket socket);\n\n  // Create a connection that connects to the specified address.\n  ConnectionImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::string addr);\n\n  // Implementation of EventHandler.\n  void handleEventsFromLoop(int events) override;\n\n protected:\n  // Implement the entry points called by ConnectionImplBoilerplate.\n  void initImplFromLoop() override;\n  void readImplFromLoop(read_callback_fn fn) override;\n  void readImplFromLoop(AbstractNopHolder& object, read_nop_callback_fn fn)\n      override;\n  void readImplFromLoop(void* ptr, size_t length, read_callback_fn fn) override;\n  void writeImplFromLoop(const void* ptr, size_t length, write_callback_fn fn)\n      override;\n  void writeImplFromLoop(const AbstractNopHolder& object, write_callback_fn fn)\n      override;\n  void handleErrorImpl() override;\n\n private:\n  // Handle events of type EPOLLIN on the UNIX domain socket.\n  //\n  // The only data that is expected on that socket is the file descriptors for\n  // the other side's inbox (which is this side's outbox) and its reactor, plus\n  // the reactor tokens to trigger the other side to read or write.\n  void handleEventInFromLoop();\n\n  // Handle events of type EPOLLOUT on the UNIX domain socket.\n  //\n  // Once the socket is writable we send the file descriptors for this side's\n  // inbox (which the other side's outbox) and our reactor, plus the reactor\n  // tokens to trigger this connection to read or write.\n  void handleEventOutFromLoop();\n\n  State state_{INITIALIZING};\n  Socket socket_;\n  optional<Sockaddr> sockaddr_;\n\n  // Inbox.\n  ShmSegment inboxHeaderSegment_;\n  ShmSegment inboxDataSegment_;\n  RingBuffer<kNumRingbufferRoles> inboxRb_;\n  optional<Reactor::TToken> inboxReactorToken_;\n\n  // Outbox.\n  ShmSegment outboxHeaderSegment_;\n  ShmSegment outboxDataSegment_;\n  RingBuffer<kNumRingbufferRoles> outboxRb_;\n  optional<Reactor::TToken> outboxReactorToken_;\n\n  // Peer trigger/tokens.\n  optional<Reactor::Trigger> peerReactorTrigger_;\n  optional<Reactor::TToken> peerInboxReactorToken_;\n  optional<Reactor::TToken> peerOutboxReactorToken_;\n\n  // Pending read operations.\n  std::deque<RingbufferReadOperation> readOperations_;\n\n  // Pending write operations.\n  std::deque<RingbufferWriteOperation> writeOperations_;\n\n  // Process pending read operations if in an operational state.\n  //\n  // This may be triggered by the other side of the connection (by pushing this\n  // side's inbox token to the reactor) when it has written some new data to its\n  // outbox (which is this side's inbox). It is also called by this connection\n  // when it moves into an established state or when a new read operation is\n  // queued, in case data was already available before this connection was ready\n  // to consume it.\n  void processReadOperationsFromLoop();\n\n  // Process pending write operations if in an operational state.\n  //\n  // This may be triggered by the other side of the connection (by pushing this\n  // side's outbox token to the reactor) when it has read some data from its\n  // inbox (which is this side's outbox). This is important when some of this\n  // side's writes couldn't complete because the outbox was full, and thus they\n  // needed to wait for some of its data to be read. This method is also called\n  // by this connection when it moves into an established state, in case some\n  // writes were queued before the connection was ready to process them, or when\n  // a new write operation is queued.\n  void processWriteOperationsFromLoop();\n};\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/shm/context_impl.h>\n\n#include <tensorpipe/common/epoll_loop.h>\n#include <tensorpipe/common/system.h>\n#include <tensorpipe/transport/shm/connection_impl.h>\n#include <tensorpipe/transport/shm/listener_impl.h>\n#include <tensorpipe/transport/shm/reactor.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\nnamespace {\n\n// Prepend descriptor with transport name so it's easy to\n// disambiguate descriptors when debugging.\nconst std::string kDomainDescriptorPrefix{\"shm:\"};\n\n} // namespace\n\nstd::shared_ptr<ContextImpl> ContextImpl::create() {\n  std::ostringstream oss;\n  oss << kDomainDescriptorPrefix;\n\n  // This transport only works across processes on the same machine, and we\n  // detect that by computing the boot ID.\n  auto bootID = getBootID();\n  TP_THROW_ASSERT_IF(!bootID.has_value()) << \"Unable to read boot_id\";\n  oss << bootID.value();\n\n  // This transport bootstraps a connection by opening a UNIX domain socket, for\n  // which it uses an \"abstract\" address (i.e., just an identifier, which is not\n  // materialized to a filesystem path). In order for the two endpoints to\n  // access each other's address they must be in the same Linux kernel network\n  // namespace (see network_namespaces(7)).\n  auto nsID = getLinuxNamespaceId(LinuxNamespace::kNet);\n  if (!nsID.has_value()) {\n    TP_VLOG(8) << \"Unable to read net namespace ID\";\n    return nullptr;\n  }\n  oss << '_' << nsID.value();\n\n  // Over that UNIX domain socket, the two endpoints exchange file descriptors\n  // to regions of shared memory. Some restrictions may be in place that prevent\n  // allocating such regions, hence let's allocate one here to see if it works.\n  Error error;\n  ShmSegment segment;\n  std::tie(error, segment) = ShmSegment::alloc(1024 * 1024);\n  if (error) {\n    TP_VLOG(8) << \"Couldn't allocate shared memory segment: \" << error.what();\n    return nullptr;\n  }\n\n  // A separate problem is that /dev/shm may be sized too small for all the\n  // memory we need to allocate. However, our memory usage is unbounded, as it\n  // grows as we open more connections, hence we cannot check it in advance.\n\n  std::string domainDescriptor = oss.str();\n  TP_VLOG(8) << \"The domain descriptor for SHM is \" << domainDescriptor;\n  return std::make_shared<ContextImpl>(std::move(domainDescriptor));\n}\n\nContextImpl::ContextImpl(std::string domainDescriptor)\n    : ContextImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          std::move(domainDescriptor)) {}\n\nvoid ContextImpl::handleErrorImpl() {\n  loop_.close();\n  reactor_.close();\n}\n\nvoid ContextImpl::joinImpl() {\n  loop_.join();\n  reactor_.join();\n}\n\nbool ContextImpl::inLoop() const {\n  return reactor_.inLoop();\n};\n\nvoid ContextImpl::deferToLoop(std::function<void()> fn) {\n  reactor_.deferToLoop(std::move(fn));\n};\n\nvoid ContextImpl::registerDescriptor(\n    int fd,\n    int events,\n    std::shared_ptr<EpollLoop::EventHandler> h) {\n  loop_.registerDescriptor(fd, events, std::move(h));\n}\n\nvoid ContextImpl::unregisterDescriptor(int fd) {\n  loop_.unregisterDescriptor(fd);\n}\n\nContextImpl::TToken ContextImpl::addReaction(TFunction fn) {\n  return reactor_.add(std::move(fn));\n}\n\nvoid ContextImpl::removeReaction(TToken token) {\n  reactor_.remove(token);\n}\n\nstd::tuple<int, int> ContextImpl::reactorFds() {\n  return reactor_.fds();\n}\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <functional>\n#include <memory>\n#include <tuple>\n\n#include <tensorpipe/common/epoll_loop.h>\n#include <tensorpipe/transport/context_impl_boilerplate.h>\n#include <tensorpipe/transport/shm/reactor.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\nclass ConnectionImpl;\nclass ListenerImpl;\n\nclass ContextImpl final\n    : public ContextImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl> {\n public:\n  static std::shared_ptr<ContextImpl> create();\n\n  explicit ContextImpl(std::string domainDescriptor);\n\n  // Implement the DeferredExecutor interface.\n  bool inLoop() const override;\n  void deferToLoop(std::function<void()> fn) override;\n\n  void registerDescriptor(\n      int fd,\n      int events,\n      std::shared_ptr<EpollLoop::EventHandler> h);\n\n  void unregisterDescriptor(int fd);\n\n  using TToken = uint32_t;\n  using TFunction = std::function<void()>;\n\n  TToken addReaction(TFunction fn);\n\n  void removeReaction(TToken token);\n\n  std::tuple<int, int> reactorFds();\n\n protected:\n  // Implement the entry points called by ContextImplBoilerplate.\n  void handleErrorImpl() override;\n  void joinImpl() override;\n\n private:\n  Reactor reactor_;\n  EpollLoop loop_{this->reactor_};\n};\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/factory.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/shm/factory.h>\n\n#include <tensorpipe/transport/context_boilerplate.h>\n#include <tensorpipe/transport/shm/connection_impl.h>\n#include <tensorpipe/transport/shm/context_impl.h>\n#include <tensorpipe/transport/shm/listener_impl.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\nstd::shared_ptr<Context> create() {\n  return std::make_shared<\n      ContextBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>>();\n}\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/factory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\nstd::shared_ptr<Context> create();\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/listener_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/shm/listener_impl.h>\n\n#include <deque>\n#include <functional>\n#include <mutex>\n#include <vector>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/epoll_loop.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/transport/error.h>\n#include <tensorpipe/transport/shm/connection_impl.h>\n#include <tensorpipe/transport/shm/context_impl.h>\n#include <tensorpipe/transport/shm/sockaddr.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\nListenerImpl::ListenerImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::string addr)\n    : ListenerImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      sockaddr_(Sockaddr::createAbstractUnixAddr(addr)) {}\n\nvoid ListenerImpl::initImplFromLoop() {\n  context_->enroll(*this);\n\n  Error error;\n  TP_DCHECK(!socket_.hasValue());\n  std::tie(error, socket_) = Socket::createForFamily(AF_UNIX);\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n  error = socket_.bind(sockaddr_);\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n  error = socket_.block(false);\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n  error = socket_.listen(128);\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n  struct sockaddr_storage addr;\n  socklen_t addrlen;\n  std::tie(error, addr, addrlen) = socket_.getSockName();\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n  sockaddr_ = Sockaddr(reinterpret_cast<struct sockaddr*>(&addr), addrlen);\n}\n\nvoid ListenerImpl::handleErrorImpl() {\n  if (!fns_.empty()) {\n    context_->unregisterDescriptor(socket_.fd());\n  }\n  socket_.reset();\n  for (auto& fn : fns_) {\n    fn(error_, std::shared_ptr<Connection>());\n  }\n  fns_.clear();\n\n  context_->unenroll(*this);\n}\n\nvoid ListenerImpl::acceptImplFromLoop(accept_callback_fn fn) {\n  fns_.push_back(std::move(fn));\n\n  // Only register if we go from 0 to 1 pending callbacks. In other cases we\n  // already had a pending callback and thus we were already registered.\n  if (fns_.size() == 1) {\n    // Register with loop for readability events.\n    context_->registerDescriptor(socket_.fd(), EPOLLIN, shared_from_this());\n  }\n}\n\nstd::string ListenerImpl::addrImplFromLoop() const {\n  return sockaddr_.str();\n}\n\nvoid ListenerImpl::handleEventsFromLoop(int events) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Listener \" << id_ << \" is handling an event on its socket (\"\n             << EpollLoop::formatEpollEvents(events) << \")\";\n\n  if (events & EPOLLERR) {\n    int error;\n    socklen_t errorlen = sizeof(error);\n    int rv = getsockopt(\n        socket_.fd(),\n        SOL_SOCKET,\n        SO_ERROR,\n        reinterpret_cast<void*>(&error),\n        &errorlen);\n    if (rv == -1) {\n      setError(TP_CREATE_ERROR(SystemError, \"getsockopt\", rv));\n    } else {\n      setError(TP_CREATE_ERROR(SystemError, \"async error on socket\", error));\n    }\n    return;\n  }\n  if (events & EPOLLHUP) {\n    setError(TP_CREATE_ERROR(EOFError));\n    return;\n  }\n  TP_ARG_CHECK_EQ(events, EPOLLIN);\n\n  Error error;\n  Socket socket;\n  std::tie(error, socket) = socket_.accept();\n  if (error) {\n    setError(std::move(error));\n    return;\n  }\n\n  TP_DCHECK(!fns_.empty())\n      << \"when the callback is disarmed the listener's descriptor is supposed \"\n      << \"to be unregistered\";\n  auto fn = std::move(fns_.front());\n  fns_.pop_front();\n  if (fns_.empty()) {\n    context_->unregisterDescriptor(socket_.fd());\n  }\n  fn(Error::kSuccess, createAndInitConnection(std::move(socket)));\n}\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/listener_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <deque>\n#include <memory>\n#include <string>\n\n#include <tensorpipe/common/epoll_loop.h>\n#include <tensorpipe/common/socket.h>\n#include <tensorpipe/transport/listener_impl_boilerplate.h>\n#include <tensorpipe/transport/shm/sockaddr.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\nclass ConnectionImpl;\nclass ContextImpl;\n\nclass ListenerImpl final\n    : public ListenerImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>,\n      public EpollLoop::EventHandler {\n public:\n  // Create a listener that listens on the specified address.\n  ListenerImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::string addr);\n\n  // Implementation of EventHandler.\n  void handleEventsFromLoop(int events) override;\n\n protected:\n  // Implement the entry points called by ListenerImplBoilerplate.\n  void initImplFromLoop() override;\n  void acceptImplFromLoop(accept_callback_fn fn) override;\n  std::string addrImplFromLoop() const override;\n  void handleErrorImpl() override;\n\n private:\n  Socket socket_;\n  Sockaddr sockaddr_;\n  std::deque<accept_callback_fn> fns_;\n};\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/reactor.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/shm/reactor.h>\n\n#include <tensorpipe/common/shm_ringbuffer.h>\n#include <tensorpipe/common/system.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\nnamespace {\n\nvoid writeToken(Reactor::Producer& producer, Reactor::TToken token) {\n  for (;;) {\n    auto rv = producer.write(&token, sizeof(token));\n    if (rv == -EAGAIN) {\n      // There's contention on the spin-lock, wait for it by retrying.\n      std::this_thread::yield();\n      continue;\n    }\n    if (rv == -ENODATA) {\n      // The ringbuffer is full. Retrying should typically work, but might lead\n      // to a deadlock if, for example, a reactor thread is trying to write a\n      // token to its own ringbuffer, as then it would be stuck here and never\n      // proceed to consume data from the ringbuffer. This could also happen\n      // across multiple processes. This case seems remote enough, and a proper\n      // solution rather complicated, that we're going to take that risk...\n      std::this_thread::yield();\n      continue;\n    }\n    TP_DCHECK_EQ(rv, sizeof(token));\n    break;\n  }\n}\n\n} // namespace\n\nReactor::Reactor() {\n  Error error;\n  std::tie(error, headerSegment_, dataSegment_, rb_) =\n      createShmRingBuffer<kNumRingbufferRoles>(kSize);\n  TP_THROW_ASSERT_IF(error)\n      << \"Couldn't allocate ringbuffer for reactor: \" << error.what();\n\n  startThread(\"TP_SHM_reactor\");\n}\n\nvoid Reactor::close() {\n  if (!closed_.exchange(true)) {\n    stopBusyPolling();\n  }\n}\n\nvoid Reactor::join() {\n  close();\n\n  if (!joined_.exchange(true)) {\n    joinThread();\n  }\n}\n\nReactor::~Reactor() {\n  join();\n}\n\nReactor::TToken Reactor::add(TFunction fn) {\n  std::unique_lock<std::mutex> lock(mutex_);\n  TToken token;\n\n  // Either reuse a token or generate a new one.\n  auto it = reusableTokens_.begin();\n  if (it != reusableTokens_.end()) {\n    token = *it;\n    reusableTokens_.erase(it);\n  } else {\n    // If there are no reusable tokens, the next token is always equal\n    // to the number of tokens in use + 1.\n    token = functions_.size();\n  }\n\n  // Ensure there is enough space in the functions vector.\n  if (functions_.size() <= token) {\n    functions_.resize(token + 1);\n  }\n\n  functions_[token] = std::move(fn);\n\n  functionCount_++;\n\n  return token;\n}\n\nvoid Reactor::remove(TToken token) {\n  std::unique_lock<std::mutex> lock(mutex_);\n  functions_[token] = nullptr;\n  reusableTokens_.insert(token);\n  functionCount_--;\n}\n\nstd::tuple<int, int> Reactor::fds() const {\n  return std::make_tuple(headerSegment_.getFd(), dataSegment_.getFd());\n}\n\nbool Reactor::pollOnce() {\n  Consumer reactorConsumer(rb_);\n  uint32_t token;\n  auto ret = reactorConsumer.read(&token, sizeof(token));\n  if (ret == -ENODATA) {\n    return false;\n  }\n  TP_THROW_SYSTEM_IF(ret < 0, -ret);\n\n  TFunction fn;\n\n  // Make copy of std::function so we don't need\n  // to hold the lock while executing it.\n  {\n    std::unique_lock<std::mutex> lock(mutex_);\n    TP_DCHECK_LT(token, functions_.size());\n    fn = functions_[token];\n  }\n\n  if (fn) {\n    fn();\n  }\n\n  return true;\n}\n\nbool Reactor::readyToClose() {\n  return functionCount_ == 0;\n}\n\nReactor::Trigger::Trigger(Fd headerFd, Fd dataFd) {\n  // The header and data segment objects take over ownership\n  // of file descriptors. Release them to avoid double close.\n  Error error;\n  std::tie(error, headerSegment_, dataSegment_, rb_) =\n      loadShmRingBuffer<kNumRingbufferRoles>(\n          std::move(headerFd), std::move(dataFd));\n  TP_THROW_ASSERT_IF(error)\n      << \"Couldn't access ringbuffer of remote reactor: \" << error.what();\n}\n\nvoid Reactor::Trigger::run(TToken token) {\n  Producer producer(rb_);\n  writeToken(producer, token);\n}\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/reactor.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <atomic>\n#include <functional>\n#include <future>\n#include <list>\n#include <mutex>\n#include <set>\n#include <thread>\n#include <vector>\n\n#include <tensorpipe/common/busy_polling_loop.h>\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/fd.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/common/ringbuffer_role.h>\n#include <tensorpipe/common/shm_segment.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\n// Reactor loop.\n//\n// Companion class to the event loop in `loop.h` that executes\n// functions on triggers. The triggers are posted to a shared memory\n// ring buffer, so this can be done by other processes on the same\n// machine. It uses extra data in the ring buffer header to store a\n// mutex and condition variable to avoid a busy loop.\n//\nclass Reactor final : public BusyPollingLoop {\n  // This allows for buffering 1M triggers (at 4 bytes a piece).\n  static constexpr auto kSize = 4 * 1024 * 1024;\n\n  static constexpr int kNumRingbufferRoles = 2;\n\n public:\n  using TFunction = std::function<void()>;\n  using TToken = uint32_t;\n  using Consumer = RingBufferRole<kNumRingbufferRoles, 0>;\n  using Producer = RingBufferRole<kNumRingbufferRoles, 1>;\n\n  Reactor();\n\n  // Add function to the reactor.\n  // Returns token that can be used to trigger it.\n  TToken add(TFunction fn);\n\n  // Removes function associated with token from reactor.\n  void remove(TToken token);\n\n  // Returns the file descriptors for the underlying ring buffer.\n  std::tuple<int, int> fds() const;\n\n  void close();\n\n  void join();\n\n  ~Reactor();\n\n protected:\n  bool pollOnce() override;\n\n  bool readyToClose() override;\n\n private:\n  ShmSegment headerSegment_;\n  ShmSegment dataSegment_;\n  RingBuffer<kNumRingbufferRoles> rb_;\n\n  std::mutex mutex_;\n  std::atomic<bool> closed_{false};\n  std::atomic<bool> joined_{false};\n\n  // Tokens are placed in this set if they can be reused.\n  std::set<TToken> reusableTokens_;\n\n  // Map reactor tokens to functions.\n  //\n  // The tokens are reused so we don't worry about unbounded growth\n  // and comfortably use a std::vector here.\n  //\n  std::vector<TFunction> functions_;\n\n  // Count how many functions are registered.\n  std::atomic<uint64_t> functionCount_{0};\n\n public:\n  class Trigger {\n   public:\n    Trigger(Fd header, Fd data);\n\n    void run(TToken token);\n\n   private:\n    ShmSegment headerSegment_;\n    ShmSegment dataSegment_;\n    RingBuffer<kNumRingbufferRoles> rb_;\n  };\n};\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/sockaddr.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/shm/sockaddr.h>\n\n#include <fcntl.h>\n#include <sys/un.h>\n#include <unistd.h>\n\n#include <cstring>\n\n#include <tensorpipe/common/defs.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\nSockaddr Sockaddr::createAbstractUnixAddr(const std::string& name) {\n  struct sockaddr_un sun;\n  sun.sun_family = AF_UNIX;\n  std::memset(&sun.sun_path, 0, sizeof(sun.sun_path));\n  // There are three \"modes\" for binding UNIX domain sockets:\n  // - if len(path) == 0: it autobinds to an abstract address\n  // - if len(path) > 0 and path[0] == 0: it uses an explicit abstract address\n  // - if len(path) > 0 and path[0] != 0: it uses a concrete filesystem path\n  if (name == \"\") {\n    return Sockaddr(\n        reinterpret_cast<struct sockaddr*>(&sun), sizeof(sun.sun_family));\n  } else {\n    constexpr size_t offset = 1;\n    const size_t len = std::min(sizeof(sun.sun_path) - offset, name.size());\n    std::strncpy(&sun.sun_path[offset], name.data(), len);\n\n    // Note: instead of using sizeof(sun) we compute the addrlen from\n    // the string length of the abstract socket name. If we use\n    // sizeof(sun), lsof shows all the trailing NUL characters.\n    return Sockaddr(\n        reinterpret_cast<struct sockaddr*>(&sun),\n        sizeof(sun.sun_family) + offset + len);\n  }\n};\n\nSockaddr::Sockaddr(const struct sockaddr* addr, socklen_t addrlen) {\n  TP_ARG_CHECK(addr != nullptr);\n  TP_ARG_CHECK_LE(addrlen, sizeof(addr_));\n  std::memset(&addr_, 0, sizeof(addr_));\n  std::memcpy(&addr_, addr, addrlen);\n  addrlen_ = addrlen;\n}\n\nstd::string Sockaddr::str() const {\n  TP_DCHECK_GE(addrlen_, sizeof(sockaddr_un::sun_family));\n  if (addrlen_ == sizeof(sockaddr_un::sun_family)) {\n    return \"\";\n  } else {\n    const struct sockaddr_un* sun{\n        reinterpret_cast<const struct sockaddr_un*>(&addr_)};\n    TP_DCHECK_EQ(sun->sun_path[0], '\\0');\n    constexpr size_t offset = 1;\n    const size_t len = addrlen_ - sizeof(sun->sun_family) - offset;\n    return std::string(&sun->sun_path[offset], len);\n  }\n}\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/shm/sockaddr.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <sys/socket.h>\n\n#include <cstring>\n#include <string>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/common/socket.h>\n#include <tensorpipe/transport/error.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace shm {\n\nclass Sockaddr final : public tensorpipe::Sockaddr {\n public:\n  static Sockaddr createAbstractUnixAddr(const std::string& name);\n\n  inline const struct sockaddr* addr() const override {\n    return reinterpret_cast<const struct sockaddr*>(&addr_);\n  }\n\n  inline socklen_t addrlen() const override {\n    return addrlen_;\n  }\n\n  std::string str() const;\n\n  explicit Sockaddr(const struct sockaddr* addr, socklen_t addrlen);\n\n private:\n  struct sockaddr_storage addr_;\n  socklen_t addrlen_;\n};\n\n} // namespace shm\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/connection_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/uv/connection_impl.h>\n\n#include <array>\n#include <deque>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/common/stream_read_write_ops.h>\n#include <tensorpipe/transport/uv/context_impl.h>\n#include <tensorpipe/transport/uv/error.h>\n#include <tensorpipe/transport/uv/loop.h>\n#include <tensorpipe/transport/uv/sockaddr.h>\n#include <tensorpipe/transport/uv/uv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nConnectionImpl::ConnectionImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::unique_ptr<TCPHandle> handle)\n    : ConnectionImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      handle_(std::move(handle)) {}\n\nConnectionImpl::ConnectionImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::string addr)\n    : ConnectionImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      handle_(context_->createHandle()),\n      sockaddr_(Sockaddr::createInetSockAddr(addr)) {}\n\nvoid ConnectionImpl::initImplFromLoop() {\n  context_->enroll(*this);\n\n  TP_VLOG(9) << \"Connection \" << id_ << \" is initializing in loop\";\n\n  if (sockaddr_.has_value()) {\n    TP_THROW_ASSERT_IF(context_->closed());\n    handle_->initFromLoop();\n    handle_->connectFromLoop(sockaddr_.value(), [this](int status) {\n      if (status < 0) {\n        setError(TP_CREATE_ERROR(UVError, status));\n      }\n    });\n  }\n  handle_->armCloseCallbackFromLoop(\n      [this]() { this->closeCallbackFromLoop(); });\n  handle_->armAllocCallbackFromLoop(\n      [this](uv_buf_t* buf) { this->allocCallbackFromLoop(buf); });\n  handle_->armReadCallbackFromLoop([this](ssize_t nread, const uv_buf_t* buf) {\n    this->readCallbackFromLoop(nread, buf);\n  });\n}\n\nvoid ConnectionImpl::readImplFromLoop(read_callback_fn fn) {\n  readOperations_.emplace_back(std::move(fn));\n\n  // Start reading if this is the first read operation.\n  if (readOperations_.size() == 1) {\n    handle_->readStartFromLoop();\n  }\n}\n\nvoid ConnectionImpl::readImplFromLoop(\n    void* ptr,\n    size_t length,\n    read_callback_fn fn) {\n  readOperations_.emplace_back(ptr, length, std::move(fn));\n\n  // Start reading if this is the first read operation.\n  if (readOperations_.size() == 1) {\n    handle_->readStartFromLoop();\n  }\n}\n\nvoid ConnectionImpl::writeImplFromLoop(\n    const void* ptr,\n    size_t length,\n    write_callback_fn fn) {\n  writeOperations_.emplace_back(ptr, length, std::move(fn));\n\n  auto& writeOperation = writeOperations_.back();\n  StreamWriteOperation::Buf* bufsPtr;\n  unsigned int bufsLen;\n  std::tie(bufsPtr, bufsLen) = writeOperation.getBufs();\n  const std::array<uv_buf_t, 2> uvBufs = {\n      uv_buf_t{bufsPtr[0].base, bufsPtr[0].len},\n      uv_buf_t{bufsPtr[1].base, bufsPtr[1].len}};\n  handle_->writeFromLoop(uvBufs.data(), bufsLen, [this](int status) {\n    this->writeCallbackFromLoop(status);\n  });\n}\n\nvoid ConnectionImpl::allocCallbackFromLoop(uv_buf_t* buf) {\n  TP_DCHECK(context_->inLoop());\n  TP_THROW_ASSERT_IF(readOperations_.empty());\n  TP_VLOG(9) << \"Connection \" << id_\n             << \" has incoming data for which it needs to provide a buffer\";\n  readOperations_.front().allocFromLoop(&buf->base, &buf->len);\n}\n\nvoid ConnectionImpl::readCallbackFromLoop(\n    ssize_t nread,\n    const uv_buf_t* /* unused */) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Connection \" << id_ << \" has completed reading some data (\"\n             << (nread >= 0 ? std::to_string(nread) + \" bytes\"\n                            : formatUvError(nread))\n             << \")\";\n\n  if (nread < 0) {\n    setError(TP_CREATE_ERROR(UVError, nread));\n    return;\n  }\n\n  TP_THROW_ASSERT_IF(readOperations_.empty());\n  auto& readOperation = readOperations_.front();\n  readOperation.readFromLoop(nread);\n  if (readOperation.completeFromLoop()) {\n    readOperation.callbackFromLoop(Error::kSuccess);\n    // Remove the completed operation.\n    // If this was the final pending operation, this instance should\n    // no longer receive allocation and read callbacks.\n    readOperations_.pop_front();\n    if (readOperations_.empty()) {\n      handle_->readStopFromLoop();\n    }\n  }\n}\n\nvoid ConnectionImpl::writeCallbackFromLoop(int status) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Connection \" << id_ << \" has completed a write request (\"\n             << formatUvError(status) << \")\";\n\n  if (status < 0) {\n    setError(TP_CREATE_ERROR(UVError, status));\n    // Do NOT return, because the error handler method will only fire the\n    // callbacks of the read operations, because we can only fire the callbacks\n    // of the write operations after their corresponding UV requests complete\n    // (or else the user may deallocate the buffers while the loop is still\n    // processing them), therefore we must fire the write operation callbacks in\n    // this method, both in case of success and of error.\n  }\n\n  TP_THROW_ASSERT_IF(writeOperations_.empty());\n  auto& writeOperation = writeOperations_.front();\n  writeOperation.callbackFromLoop(error_);\n  writeOperations_.pop_front();\n}\n\nvoid ConnectionImpl::closeCallbackFromLoop() {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Connection \" << id_ << \" has finished closing its handle\";\n  TP_DCHECK(writeOperations_.empty());\n  context_->unenroll(*this);\n}\n\nvoid ConnectionImpl::handleErrorImpl() {\n  for (auto& readOperation : readOperations_) {\n    readOperation.callbackFromLoop(error_);\n  }\n  readOperations_.clear();\n  // Do NOT fire the callbacks of the write operations, because we must wait for\n  // their corresponding UV write requests to complete (or else the user may\n  // deallocate the buffers while the loop is still processing them).\n  handle_->closeFromLoop();\n  // Do NOT unenroll here, as we must keep the UV handle alive until the close\n  // callback fires.\n}\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/connection_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <cstddef>\n#include <deque>\n#include <memory>\n#include <string>\n\n#include <tensorpipe/common/optional.h>\n#include <tensorpipe/common/stream_read_write_ops.h>\n#include <tensorpipe/transport/connection_impl_boilerplate.h>\n#include <tensorpipe/transport/uv/sockaddr.h>\n#include <tensorpipe/transport/uv/uv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nclass ContextImpl;\nclass ListenerImpl;\n\nclass ConnectionImpl final : public ConnectionImplBoilerplate<\n                                 ContextImpl,\n                                 ListenerImpl,\n                                 ConnectionImpl> {\n public:\n  // Create a connection that is already connected (e.g. from a listener).\n  ConnectionImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::unique_ptr<TCPHandle> handle);\n\n  // Create a connection that connects to the specified address.\n  ConnectionImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::string addr);\n\n protected:\n  // Implement the entry points called by ConnectionImplBoilerplate.\n  void initImplFromLoop() override;\n  void readImplFromLoop(read_callback_fn fn) override;\n  void readImplFromLoop(void* ptr, size_t length, read_callback_fn fn) override;\n  void writeImplFromLoop(const void* ptr, size_t length, write_callback_fn fn)\n      override;\n  void handleErrorImpl() override;\n\n private:\n  // Called when libuv is about to read data from connection.\n  void allocCallbackFromLoop(uv_buf_t* buf);\n\n  // Called when libuv has read data from connection.\n  void readCallbackFromLoop(ssize_t nread, const uv_buf_t* buf);\n\n  // Called when libuv has written data to connection.\n  void writeCallbackFromLoop(int status);\n\n  // Called when libuv has closed the handle.\n  void closeCallbackFromLoop();\n\n  const std::unique_ptr<TCPHandle> handle_;\n  optional<Sockaddr> sockaddr_;\n\n  std::deque<StreamReadOperation> readOperations_;\n  std::deque<StreamWriteOperation> writeOperations_;\n};\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/context_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/uv/context_impl.h>\n\n#include <tensorpipe/transport/uv/connection_impl.h>\n#include <tensorpipe/transport/uv/listener_impl.h>\n#include <tensorpipe/transport/uv/uv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nnamespace {\n\n// Prepend descriptor with transport name so it's easy to\n// disambiguate descriptors when debugging.\nconst std::string kDomainDescriptorPrefix{\"uv:\"};\n\nstd::string generateDomainDescriptor() {\n  return kDomainDescriptorPrefix + \"*\";\n}\n\n} // namespace\n\nstd::shared_ptr<ContextImpl> ContextImpl::create() {\n  return std::make_shared<ContextImpl>();\n}\n\nContextImpl::ContextImpl()\n    : ContextImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          generateDomainDescriptor()) {}\n\nvoid ContextImpl::handleErrorImpl() {\n  loop_.close();\n}\n\nvoid ContextImpl::joinImpl() {\n  loop_.join();\n}\n\nbool ContextImpl::inLoop() const {\n  return loop_.inLoop();\n};\n\nvoid ContextImpl::deferToLoop(std::function<void()> fn) {\n  loop_.deferToLoop(std::move(fn));\n};\n\nstd::unique_ptr<TCPHandle> ContextImpl::createHandle() {\n  return std::make_unique<TCPHandle>(loop_.ptr(), loop_);\n};\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/context_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <functional>\n#include <memory>\n#include <string>\n#include <tuple>\n\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/transport/context_impl_boilerplate.h>\n#include <tensorpipe/transport/uv/loop.h>\n#include <tensorpipe/transport/uv/uv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nclass ConnectionImpl;\nclass ListenerImpl;\n\nclass ContextImpl final\n    : public ContextImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl> {\n public:\n  static std::shared_ptr<ContextImpl> create();\n\n  ContextImpl();\n\n  // Implement the DeferredExecutor interface.\n  bool inLoop() const override;\n  void deferToLoop(std::function<void()> fn) override;\n\n  std::unique_ptr<TCPHandle> createHandle();\n\n protected:\n  // Implement the entry points called by ContextImplBoilerplate.\n  void handleErrorImpl() override;\n  void joinImpl() override;\n\n private:\n  Loop loop_;\n};\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/error.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/uv/error.h>\n\n#include <tensorpipe/transport/uv/uv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nstd::string UVError::what() const {\n  return formatUvError(error_);\n}\n\nstd::string NoAddrFoundError::what() const {\n  return \"no address found\";\n}\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/error.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n\n#include <tensorpipe/transport/error.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nclass UVError final : public BaseError {\n public:\n  explicit UVError(int error) : error_(error) {}\n\n  std::string what() const override;\n\n private:\n  int error_;\n};\n\nclass NoAddrFoundError final : public BaseError {\n public:\n  NoAddrFoundError() {}\n\n  std::string what() const override;\n};\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/factory.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/uv/factory.h>\n\n#include <tensorpipe/transport/context_boilerplate.h>\n#include <tensorpipe/transport/uv/connection_impl.h>\n#include <tensorpipe/transport/uv/context_impl.h>\n#include <tensorpipe/transport/uv/listener_impl.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nstd::shared_ptr<Context> create() {\n  return std::make_shared<\n      ContextBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>>();\n}\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/factory.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n\n#include <tensorpipe/transport/context.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nstd::shared_ptr<Context> create();\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/listener_impl.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/uv/listener_impl.h>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/transport/uv/connection_impl.h>\n#include <tensorpipe/transport/uv/context_impl.h>\n#include <tensorpipe/transport/uv/error.h>\n#include <tensorpipe/transport/uv/loop.h>\n#include <tensorpipe/transport/uv/sockaddr.h>\n#include <tensorpipe/transport/uv/uv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nListenerImpl::ListenerImpl(\n    ConstructorToken token,\n    std::shared_ptr<ContextImpl> context,\n    std::string id,\n    std::string addr)\n    : ListenerImplBoilerplate<ContextImpl, ListenerImpl, ConnectionImpl>(\n          token,\n          std::move(context),\n          std::move(id)),\n      handle_(context_->createHandle()),\n      sockaddr_(Sockaddr::createInetSockAddr(addr)) {}\n\nvoid ListenerImpl::initImplFromLoop() {\n  context_->enroll(*this);\n\n  TP_VLOG(9) << \"Listener \" << id_ << \" is initializing in loop\";\n\n  TP_THROW_ASSERT_IF(context_->closed());\n  handle_->initFromLoop();\n  auto rv = handle_->bindFromLoop(sockaddr_);\n  TP_THROW_UV_IF(rv < 0, rv);\n  handle_->armCloseCallbackFromLoop(\n      [this]() { this->closeCallbackFromLoop(); });\n  handle_->listenFromLoop(\n      [this](int status) { this->connectionCallbackFromLoop(status); });\n\n  sockaddr_ = handle_->sockNameFromLoop();\n}\n\nvoid ListenerImpl::acceptImplFromLoop(accept_callback_fn fn) {\n  callback_.arm(std::move(fn));\n}\n\nstd::string ListenerImpl::addrImplFromLoop() const {\n  return sockaddr_.str();\n}\n\nvoid ListenerImpl::connectionCallbackFromLoop(int status) {\n  TP_DCHECK(context_->inLoop());\n  TP_VLOG(9) << \"Listener \" << id_\n             << \" has an incoming connection ready to be accepted (\"\n             << formatUvError(status) << \")\";\n\n  if (status != 0) {\n    setError(TP_CREATE_ERROR(UVError, status));\n    return;\n  }\n\n  auto connection = context_->createHandle();\n  TP_THROW_ASSERT_IF(context_->closed());\n  connection->initFromLoop();\n  handle_->acceptFromLoop(*connection);\n  callback_.trigger(\n      Error::kSuccess, createAndInitConnection(std::move(connection)));\n}\n\nvoid ListenerImpl::closeCallbackFromLoop() {\n  TP_VLOG(9) << \"Listener \" << id_ << \" has finished closing its handle\";\n  context_->unenroll(*this);\n}\n\nvoid ListenerImpl::handleErrorImpl() {\n  callback_.triggerAll([&]() {\n    return std::make_tuple(std::cref(error_), std::shared_ptr<Connection>());\n  });\n  handle_->closeFromLoop();\n  // Do NOT unenroll here, as we must keep the UV handle alive until the close\n  // callback fires.\n}\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/listener_impl.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <memory>\n#include <string>\n\n#include <tensorpipe/common/callback.h>\n#include <tensorpipe/transport/listener_impl_boilerplate.h>\n#include <tensorpipe/transport/uv/sockaddr.h>\n#include <tensorpipe/transport/uv/uv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nclass ConnectionImpl;\nclass ContextImpl;\n\nclass ListenerImpl final : public ListenerImplBoilerplate<\n                               ContextImpl,\n                               ListenerImpl,\n                               ConnectionImpl> {\n public:\n  // Create a listener that listens on the specified address.\n  ListenerImpl(\n      ConstructorToken token,\n      std::shared_ptr<ContextImpl> context,\n      std::string id,\n      std::string addr);\n\n protected:\n  // Implement the entry points called by ListenerImplBoilerplate.\n  void initImplFromLoop() override;\n  void acceptImplFromLoop(accept_callback_fn fn) override;\n  std::string addrImplFromLoop() const override;\n  void handleErrorImpl() override;\n\n private:\n  // Called by libuv if the listening socket can accept a new connection. Status\n  // is 0 in case of success, < 0 otherwise. See `uv_connection_cb` for more\n  // information.\n  void connectionCallbackFromLoop(int status);\n\n  // Called when libuv has closed the handle.\n  void closeCallbackFromLoop();\n\n  const std::unique_ptr<TCPHandle> handle_;\n  Sockaddr sockaddr_;\n\n  // Once an accept callback fires, it becomes disarmed and must be rearmed.\n  // Any firings that occur while the callback is disarmed are stashed and\n  // triggered as soon as it's rearmed. With libuv we don't have the ability\n  // to disable the lower-level callback when the user callback is disarmed.\n  // So we'll keep getting notified of new connections even if we don't know\n  // what to do with them and don't want them. Thus we must store them\n  // somewhere. This is what RearmableCallback is for.\n  RearmableCallback<const Error&, std::shared_ptr<Connection>> callback_;\n};\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/loop.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/uv/loop.h>\n\n#include <tensorpipe/common/system.h>\n#include <tensorpipe/transport/uv/uv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nLoop::Loop() {\n  int rv;\n  rv = uv_loop_init(&loop_);\n  TP_THROW_UV_IF(rv < 0, rv);\n  rv = uv_async_init(&loop_, &async_, uvAsyncCb);\n  TP_THROW_UV_IF(rv < 0, rv);\n  async_.data = this;\n\n  startThread(\"TP_UV_loop\");\n}\n\nvoid Loop::close() {\n  if (!closed_.exchange(true)) {\n    // It's fine to capture this because the loop won't be destroyed until join\n    // has completed, and join won't complete until this operation is performed.\n    deferToLoop(\n        [this]() { uv_unref(reinterpret_cast<uv_handle_t*>(&async_)); });\n  }\n}\n\nvoid Loop::join() {\n  close();\n\n  if (!joined_.exchange(true)) {\n    joinThread();\n  }\n}\n\nLoop::~Loop() noexcept {\n  join();\n}\n\nvoid Loop::wakeupEventLoopToDeferFunction() {\n  auto rv = uv_async_send(&async_);\n  TP_THROW_UV_IF(rv < 0, rv);\n}\n\nvoid Loop::eventLoop() {\n  int rv;\n\n  rv = uv_run(&loop_, UV_RUN_DEFAULT);\n  TP_THROW_ASSERT_IF(rv > 0)\n      << \": uv_run returned with active handles or requests\";\n}\n\nvoid Loop::cleanUpLoop() {\n  int rv;\n\n  uv_ref(reinterpret_cast<uv_handle_t*>(&async_));\n  uv_close(reinterpret_cast<uv_handle_t*>(&async_), nullptr);\n\n  rv = uv_run(&loop_, UV_RUN_NOWAIT);\n  TP_THROW_ASSERT_IF(rv > 0)\n      << \": uv_run returned with active handles or requests\";\n\n  // Release resources associated with loop.\n  rv = uv_loop_close(&loop_);\n  TP_THROW_UV_IF(rv < 0, rv);\n}\n\nvoid Loop::uvAsyncCb(uv_async_t* handle) {\n  auto& loop = *reinterpret_cast<Loop*>(handle->data);\n  loop.runDeferredFunctionsFromEventLoop();\n}\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/loop.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <functional>\n#include <future>\n#include <memory>\n#include <mutex>\n#include <thread>\n#include <vector>\n\n#include <uv.h>\n\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/transport/uv/uv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nclass Loop final : public EventLoopDeferredExecutor {\n public:\n  Loop();\n\n  uv_loop_t* ptr() {\n    return &loop_;\n  }\n\n  bool closed() {\n    return closed_;\n  }\n\n  void close();\n\n  void join();\n\n  ~Loop() noexcept;\n\n protected:\n  // Event loop thread entry function.\n  void eventLoop() override;\n\n  // Clean up after event loop transitioned to on-demand.\n  void cleanUpLoop() override;\n\n  // Wake up the event loop.\n  void wakeupEventLoopToDeferFunction() override;\n\n private:\n  uv_loop_t loop_;\n  uv_async_t async_;\n  std::atomic<bool> closed_{false};\n  std::atomic<bool> joined_{false};\n\n  // This function is called by the event loop thread whenever\n  // we have to run a number of deferred functions.\n  static void uvAsyncCb(uv_async_t* handle);\n};\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/sockaddr.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/uv/sockaddr.h>\n\n#include <array>\n#include <cstring>\n#include <sstream>\n#include <utility>\n\n#include <uv.h>\n\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/transport/uv/uv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nSockaddr Sockaddr::createInetSockAddr(const std::string& str) {\n  int port = 0;\n  std::string addrStr;\n  std::string portStr;\n\n  // If the input string is an IPv6 address with port, the address\n  // itself must be wrapped with brackets.\n  if (addrStr.empty()) {\n    auto start = str.find(\"[\");\n    auto stop = str.find(\"]\");\n    if (start < stop && start != std::string::npos &&\n        stop != std::string::npos) {\n      addrStr = str.substr(start + 1, stop - (start + 1));\n      if (stop + 1 < str.size() && str[stop + 1] == ':') {\n        portStr = str.substr(stop + 2);\n      }\n    }\n  }\n\n  // If the input string is an IPv4 address with port, we expect\n  // at least a single period and a single colon in the string.\n  if (addrStr.empty()) {\n    auto period = str.find(\".\");\n    auto colon = str.find(\":\");\n    if (period != std::string::npos && colon != std::string::npos) {\n      addrStr = str.substr(0, colon);\n      portStr = str.substr(colon + 1);\n    }\n  }\n\n  // Fallback to using entire input string as address without port.\n  if (addrStr.empty()) {\n    addrStr = str;\n  }\n\n  // Parse port number if specified.\n  if (!portStr.empty()) {\n    port = std::stoi(portStr);\n    if (port < 0 || port > std::numeric_limits<uint16_t>::max()) {\n      TP_THROW_EINVAL() << str;\n    }\n  }\n\n  // Try to convert an IPv4 address.\n  {\n    struct sockaddr_in addr;\n    auto rv = uv_ip4_addr(addrStr.c_str(), port, &addr);\n    if (rv == 0) {\n      return Sockaddr(reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr));\n    }\n  }\n\n  // Try to convert an IPv6 address.\n  {\n    struct sockaddr_in6 addr;\n    auto rv = uv_ip6_addr(addrStr.c_str(), port, &addr);\n    if (rv == 0) {\n      return Sockaddr(reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr));\n    }\n  }\n\n  // Invalid address.\n  TP_THROW_EINVAL() << str;\n\n  // Return bogus to silence \"return from non-void function\" warning.\n  // Note: we don't reach this point per the throw above.\n  return Sockaddr(nullptr, 0);\n}\n\nstd::string Sockaddr::str() const {\n  std::ostringstream oss;\n\n  if (addr_.ss_family == AF_INET) {\n    std::array<char, 64> buf;\n    auto in = reinterpret_cast<const struct sockaddr_in*>(&addr_);\n    auto rv = uv_ip4_name(in, buf.data(), buf.size());\n    TP_THROW_UV_IF(rv < 0, rv);\n    oss << buf.data() << \":\" << htons(in->sin_port);\n  } else if (addr_.ss_family == AF_INET6) {\n    std::array<char, 64> buf;\n    auto in6 = reinterpret_cast<const struct sockaddr_in6*>(&addr_);\n    auto rv = uv_ip6_name(in6, buf.data(), buf.size());\n    TP_THROW_UV_IF(rv < 0, rv);\n    oss << \"[\" << buf.data();\n    if (in6->sin6_scope_id > 0) {\n      std::array<char, UV_IF_NAMESIZE> scopeBuf;\n      size_t size = sizeof(scopeBuf);\n      rv = uv_if_indextoiid(in6->sin6_scope_id, scopeBuf.data(), &size);\n      TP_THROW_UV_IF(rv < 0, rv);\n      oss << \"%\" << scopeBuf.data();\n    }\n    oss << \"]:\" << htons(in6->sin6_port);\n  } else {\n    TP_THROW_EINVAL() << \"invalid address family: \" << addr_.ss_family;\n  }\n\n  return oss.str();\n}\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/sockaddr.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <sys/socket.h>\n\n#include <cstring>\n#include <string>\n\n#include <tensorpipe/common/socket.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nclass Sockaddr final : public tensorpipe::Sockaddr {\n public:\n  static Sockaddr createInetSockAddr(const std::string& str);\n\n  Sockaddr(const struct sockaddr* addr, socklen_t addrlen) {\n    TP_ARG_CHECK(addr != nullptr);\n    TP_ARG_CHECK_LE(addrlen, sizeof(addr_));\n    // Ensure the sockaddr_storage is zeroed, because we don't always\n    // write to all fields in the `sockaddr_[in|in6]` structures.\n    std::memset(&addr_, 0, sizeof(addr_));\n    std::memcpy(&addr_, addr, addrlen);\n    addrlen_ = addrlen;\n  }\n\n  inline const struct sockaddr* addr() const override {\n    return reinterpret_cast<const struct sockaddr*>(&addr_);\n  }\n\n  inline struct sockaddr* addr() {\n    return reinterpret_cast<struct sockaddr*>(&addr_);\n  }\n\n  inline socklen_t addrlen() const override {\n    return addrlen_;\n  }\n\n  std::string str() const;\n\n private:\n  struct sockaddr_storage addr_;\n  socklen_t addrlen_;\n};\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/utility.cc",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#include <tensorpipe/transport/uv/utility.h>\n\n#include <tensorpipe/common/error_macros.h>\n#include <tensorpipe/transport/uv/error.h>\n#include <tensorpipe/transport/uv/sockaddr.h>\n#include <tensorpipe/transport/uv/uv.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nstd::tuple<Error, std::string> lookupAddrForIface(std::string iface) {\n  int rv;\n  InterfaceAddresses addresses;\n  int count;\n  std::tie(rv, addresses, count) = getInterfaceAddresses();\n  if (rv < 0) {\n    return std::make_tuple(TP_CREATE_ERROR(UVError, rv), std::string());\n  }\n\n  for (auto i = 0; i < count; i++) {\n    const uv_interface_address_t& interface = addresses[i];\n    if (iface != interface.name) {\n      continue;\n    }\n\n    const auto& address = interface.address;\n    const struct sockaddr* sockaddr =\n        reinterpret_cast<const struct sockaddr*>(&address);\n    switch (sockaddr->sa_family) {\n      case AF_INET:\n        return std::make_tuple(\n            Error::kSuccess,\n            Sockaddr(sockaddr, sizeof(address.address4)).str());\n      case AF_INET6:\n        return std::make_tuple(\n            Error::kSuccess,\n            Sockaddr(sockaddr, sizeof(address.address6)).str());\n    }\n  }\n\n  return std::make_tuple(TP_CREATE_ERROR(NoAddrFoundError), std::string());\n}\n\nstd::tuple<Error, std::string> lookupAddrForHostname() {\n  // For some operations we need a libuv event loop. We create a fresh one, just\n  // for this purpose, which we'll drive inline from this thread. This way we\n  // avoid misusing the main event loop in the context impl.\n  struct InlineLoop {\n    uv_loop_t loop;\n\n    InlineLoop() {\n      auto rv = uv_loop_init(&loop);\n      TP_THROW_UV_IF(rv < 0, rv);\n    }\n\n    ~InlineLoop() {\n      auto rv = uv_loop_close(&loop);\n      TP_THROW_UV_IF(rv < 0, rv);\n    }\n  };\n  InlineLoop loop;\n\n  struct InlineDeferredExecutor : public DeferredExecutor {\n    std::thread::id threadId = std::this_thread::get_id();\n\n    void deferToLoop(TTask fn) override {\n      TP_THROW_ASSERT()\n          << \"How could this be called?! This class is supposed to be \"\n          << \"instantiated as const, and this method isn't const-qualified\";\n    }\n\n    bool inLoop() const override {\n      return std::this_thread::get_id() == threadId;\n    }\n  };\n  const InlineDeferredExecutor executor;\n\n  int rv;\n  std::string hostname;\n  std::tie(rv, hostname) = getHostname();\n  if (rv < 0) {\n    return std::make_tuple(TP_CREATE_ERROR(UVError, rv), std::string());\n  }\n\n  Addrinfo info;\n  std::tie(rv, info) = getAddrinfoFromLoop(&loop.loop, std::move(hostname));\n  if (rv < 0) {\n    return std::make_tuple(TP_CREATE_ERROR(UVError, rv), std::string());\n  }\n\n  Error error;\n  for (struct addrinfo* rp = info.get(); rp != nullptr; rp = rp->ai_next) {\n    TP_DCHECK(rp->ai_family == AF_INET || rp->ai_family == AF_INET6);\n    TP_DCHECK_EQ(rp->ai_socktype, SOCK_STREAM);\n    TP_DCHECK_EQ(rp->ai_protocol, IPPROTO_TCP);\n\n    Sockaddr addr = Sockaddr(rp->ai_addr, rp->ai_addrlen);\n\n    TCPHandle handle(&loop.loop, executor);\n    handle.initFromLoop();\n    rv = handle.bindFromLoop(addr);\n    handle.closeFromLoop();\n\n    // The handle will only be closed at the next loop iteration, so run it.\n    {\n      auto rv = uv_run(&loop.loop, UV_RUN_DEFAULT);\n      TP_THROW_ASSERT_IF(rv > 0);\n    }\n\n    if (rv < 0) {\n      // Record the first binding error we encounter and return that in the end\n      // if no working address is found, in order to help with debugging.\n      if (!error) {\n        error = TP_CREATE_ERROR(UVError, rv);\n      }\n      continue;\n    }\n\n    return std::make_tuple(Error::kSuccess, addr.str());\n  }\n\n  if (error) {\n    return std::make_tuple(std::move(error), std::string());\n  } else {\n    return std::make_tuple(TP_CREATE_ERROR(NoAddrFoundError), std::string());\n  }\n}\n\nstd::tuple<Error, std::string> lookupAddrLikeNccl(\n    optional<sa_family_t> familyFilter) {\n  int rv;\n  InterfaceAddresses addresses;\n  int count;\n  std::tie(rv, addresses, count) = getInterfaceAddresses();\n  if (rv < 0) {\n    return std::make_tuple(TP_CREATE_ERROR(UVError, rv), std::string());\n  }\n\n  // Libuv already only returns the interfaces that are up and running, whose\n  // address is not null, and whose family is IPv4 or IPv6.\n\n  // NCCL prioritizes the interfaces whose name starts with \"ib\" (for IP over\n  // InfiniBand?), and deprioritizes those that start with \"docker\" or \"lo\".\n  optional<std::string> withIbPrefix;\n  optional<std::string> withoutPrefix;\n  optional<std::string> withDockerPrefix;\n  optional<std::string> withLoPrefix;\n\n  for (auto i = 0; i < count; i++) {\n    const uv_interface_address_t& interface = addresses[i];\n    const struct sockaddr* sockaddr =\n        reinterpret_cast<const struct sockaddr*>(&interface.address);\n\n    // NCCL also seems to ignore any IPv6 loopback address.\n    if (sockaddr->sa_family == AF_INET6 && interface.is_internal) {\n      continue;\n    }\n\n    if (familyFilter.has_value() &&\n        sockaddr->sa_family != familyFilter.value()) {\n      continue;\n    }\n\n    std::string addr;\n    switch (sockaddr->sa_family) {\n      case AF_INET:\n        addr = Sockaddr(sockaddr, sizeof(struct sockaddr_in)).str();\n        break;\n      case AF_INET6:\n        addr = Sockaddr(sockaddr, sizeof(struct sockaddr_in6)).str();\n        break;\n    }\n\n    std::string name = interface.name;\n    if (name.find(\"ib\") == 0) {\n      if (!withIbPrefix.has_value()) {\n        withIbPrefix = std::move(addr);\n      }\n    } else if (name.find(\"docker\") == 0) {\n      if (!withDockerPrefix.has_value()) {\n        withDockerPrefix = std::move(addr);\n      }\n    } else if (name.find(\"lo\") == 0) {\n      if (!withLoPrefix.has_value()) {\n        withLoPrefix = std::move(addr);\n      }\n    } else {\n      if (!withoutPrefix.has_value()) {\n        withoutPrefix = std::move(addr);\n      }\n    }\n  }\n\n  if (withIbPrefix.has_value()) {\n    return std::make_tuple(Error::kSuccess, std::move(withIbPrefix).value());\n  } else if (withoutPrefix.has_value()) {\n    return std::make_tuple(Error::kSuccess, std::move(withoutPrefix).value());\n  } else if (withDockerPrefix.has_value()) {\n    return std::make_tuple(\n        Error::kSuccess, std::move(withDockerPrefix).value());\n  } else if (withLoPrefix.has_value()) {\n    return std::make_tuple(Error::kSuccess, std::move(withLoPrefix).value());\n  }\n\n  return std::make_tuple(TP_CREATE_ERROR(NoAddrFoundError), std::string());\n}\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/utility.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <string>\n#include <tuple>\n\n#include <sys/socket.h>\n\n#include <tensorpipe/common/error.h>\n#include <tensorpipe/common/optional.h>\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\nstd::tuple<Error, std::string> lookupAddrForIface(std::string iface);\n\nstd::tuple<Error, std::string> lookupAddrForHostname();\n\n// Try to replicate the same logic used by NCCL to find a node's own address.\n// Roughly, it returns the \"first\" usable address it can find, and prioritizes\n// the interfaces with an `ib` prefix and de-prioritizes those with a `docker`\n// or `lo` prefix. It can optionally only return only IPv4 or IPv4 addresses.\nstd::tuple<Error, std::string> lookupAddrLikeNccl(\n    optional<sa_family_t> familyFilter = nullopt);\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "tensorpipe/transport/uv/uv.h",
    "content": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed under the BSD-style license found in the\n * LICENSE file in the root directory of this source tree.\n */\n\n#pragma once\n\n#include <array>\n#include <memory>\n\n#include <uv.h>\n\n#include <tensorpipe/common/deferred_executor.h>\n#include <tensorpipe/common/defs.h>\n#include <tensorpipe/transport/uv/sockaddr.h>\n\n#define TP_THROW_UV(err) TP_THROW(std::runtime_error)\n#define TP_THROW_UV_IF(cond, err) \\\n  if (unlikely(cond))             \\\n  TP_THROW_UV(err) << TP_STRINGIFY(cond) << \": \" << uv_strerror(err)\n\nnamespace tensorpipe {\nnamespace transport {\nnamespace uv {\n\ntemplate <typename T, typename U>\nclass BaseHandle {\n  static void uvCloseCb(uv_handle_t* handle) {\n    T& ref = *reinterpret_cast<T*>(handle->data);\n    if (ref.closeCallback_ != nullptr) {\n      ref.closeCallback_();\n    }\n  }\n\n public:\n  using TCloseCallback = std::function<void()>;\n\n  explicit BaseHandle(uv_loop_t* loop, const DeferredExecutor& executor)\n      : loop_(loop), executor_(executor) {\n    handle_.data = this;\n  }\n\n  // Libuv's handles cannot be copied or moved.\n  BaseHandle(const BaseHandle&) = delete;\n  BaseHandle(BaseHandle&&) = delete;\n  BaseHandle& operator=(const BaseHandle&) = delete;\n  BaseHandle& operator=(BaseHandle&&) = delete;\n\n  virtual ~BaseHandle() = default;\n\n  U* ptr() {\n    return &handle_;\n  }\n\n  void armCloseCallbackFromLoop(TCloseCallback fn) {\n    TP_DCHECK(this->executor_.inLoop());\n    TP_THROW_ASSERT_IF(closeCallback_ != nullptr);\n    closeCallback_ = std::move(fn);\n  }\n\n  void closeFromLoop() {\n    TP_DCHECK(!uv_is_closing(reinterpret_cast<uv_handle_t*>(ptr())));\n    uv_close(reinterpret_cast<uv_handle_t*>(ptr()), uvCloseCb);\n  }\n\n protected:\n  // Underlying libuv handle.\n  U handle_;\n\n  // Underlying libuv event loop.\n  uv_loop_t* const loop_;\n\n  // This DeferredExecutor is only used to check that all calls are performed\n  // from the right thread.\n  const DeferredExecutor& executor_;\n\n  TCloseCallback closeCallback_;\n};\n\ntemplate <typename T, typename U>\nclass BaseRequest {\n public:\n  BaseRequest() {\n    request_.data = this;\n  }\n\n  // Libuv's requests cannot be copied or moved.\n  BaseRequest(const BaseRequest&) = delete;\n  BaseRequest(BaseRequest&&) = delete;\n  BaseRequest& operator=(const BaseRequest&) = delete;\n  BaseRequest& operator=(BaseRequest&&) = delete;\n\n  U* ptr() {\n    return &request_;\n  }\n\n private:\n  // Underlying libuv request.\n  U request_;\n};\n\nclass WriteRequest final : public BaseRequest<WriteRequest, uv_write_t> {\n  static void uvWriteCb(uv_write_t* req, int status) {\n    std::unique_ptr<WriteRequest> request(\n        reinterpret_cast<WriteRequest*>(req->data));\n    request->writeCallback_(status);\n  }\n\n public:\n  using TWriteCallback = std::function<void(int status)>;\n\n  explicit WriteRequest(TWriteCallback fn) : writeCallback_(std::move(fn)) {}\n\n  static int perform(\n      uv_stream_t* handle,\n      const uv_buf_t bufs[],\n      unsigned int nbufs,\n      TWriteCallback fn) {\n    auto request = std::make_unique<WriteRequest>(std::move(fn));\n    auto rv = uv_write(request->ptr(), handle, bufs, nbufs, uvWriteCb);\n    request.release();\n    return rv;\n  }\n\n private:\n  TWriteCallback writeCallback_;\n};\n\ntemplate <typename T, typename U>\nclass StreamHandle : public BaseHandle<T, U> {\n  static void uvConnectionCb(uv_stream_t* server, int status) {\n    T& ref = *reinterpret_cast<T*>(server->data);\n    TP_DCHECK(ref.connectionCallback_ != nullptr);\n    ref.connectionCallback_(status);\n  }\n\n  static void uvAllocCb(\n      uv_handle_t* handle,\n      size_t /* unused */,\n      uv_buf_t* buf) {\n    T& ref = *reinterpret_cast<T*>(handle->data);\n    TP_DCHECK(ref.allocCallback_ != nullptr);\n    ref.allocCallback_(buf);\n  }\n\n  static void uvReadCb(\n      uv_stream_t* server,\n      ssize_t nread,\n      const uv_buf_t* buf) {\n    T& ref = *reinterpret_cast<T*>(server->data);\n    TP_DCHECK(ref.readCallback_ != nullptr);\n    ref.readCallback_(nread, buf);\n  }\n\n  static constexpr int kBacklog = 128;\n\n public:\n  using TConnectionCallback = std::function<void(int status)>;\n  using TAcceptCallback = std::function<void(int status)>;\n  using TAllocCallback = std::function<void(uv_buf_t* buf)>;\n  using TReadCallback = std::function<void(ssize_t nread, const uv_buf_t* buf)>;\n\n  using BaseHandle<T, U>::BaseHandle;\n\n  // TODO Split this into a armConnectionCallback, a listenStart and a\n  // listenStop method, to propagate the backpressure to the clients.\n  void listenFromLoop(TConnectionCallback connectionCallback) {\n    TP_DCHECK(this->executor_.inLoop());\n    TP_THROW_ASSERT_IF(connectionCallback_ != nullptr);\n    connectionCallback_ = std::move(connectionCallback);\n    auto rv = uv_listen(\n        reinterpret_cast<uv_stream_t*>(this->ptr()), kBacklog, uvConnectionCb);\n    TP_THROW_UV_IF(rv < 0, rv);\n  }\n\n  template <typename V>\n  void acceptFromLoop(V& other) {\n    TP_DCHECK(this->executor_.inLoop());\n    auto rv = uv_accept(\n        reinterpret_cast<uv_stream_t*>(this->ptr()),\n        reinterpret_cast<uv_stream_t*>(other.ptr()));\n    TP_THROW_UV_IF(rv < 0, rv);\n  }\n\n  void armAllocCallbackFromLoop(TAllocCallback fn) {\n    TP_DCHECK(this->executor_.inLoop());\n    TP_THROW_ASSERT_IF(allocCallback_ != nullptr);\n    allocCallback_ = std::move(fn);\n  }\n\n  void armReadCallbackFromLoop(TReadCallback fn) {\n    TP_DCHECK(this->executor_.inLoop());\n    TP_THROW_ASSERT_IF(readCallback_ != nullptr);\n    readCallback_ = std::move(fn);\n  }\n\n  void readStartFromLoop() {\n    TP_DCHECK(this->executor_.inLoop());\n    TP_THROW_ASSERT_IF(allocCallback_ == nullptr);\n    TP_THROW_ASSERT_IF(readCallback_ == nullptr);\n    auto rv = uv_read_start(\n        reinterpret_cast<uv_stream_t*>(this->ptr()), uvAllocCb, uvReadCb);\n    TP_THROW_UV_IF(rv < 0, rv);\n  }\n\n  void readStopFromLoop() {\n    TP_DCHECK(this->executor_.inLoop());\n    auto rv = uv_read_stop(reinterpret_cast<uv_stream_t*>(this->ptr()));\n    TP_THROW_UV_IF(rv < 0, rv);\n  }\n\n  void writeFromLoop(\n      const uv_buf_t bufs[],\n      unsigned int nbufs,\n      WriteRequest::TWriteCallback fn) {\n    TP_DCHECK(this->executor_.inLoop());\n    auto rv = WriteRequest::perform(\n        reinterpret_cast<uv_stream_t*>(this->ptr()),\n        bufs,\n        nbufs,\n        std::move(fn));\n    TP_THROW_UV_IF(rv < 0, rv);\n  }\n\n protected:\n  TConnectionCallback connectionCallback_;\n  TAllocCallback allocCallback_;\n  TReadCallback readCallback_;\n};\n\nclass ConnectRequest final : public BaseRequest<ConnectRequest, uv_connect_t> {\n  static void uvConnectCb(uv_connect_t* req, int status) {\n    std::unique_ptr<ConnectRequest> request(\n        reinterpret_cast<ConnectRequest*>(req->data));\n    request->connectCallback_(status);\n  }\n\n public:\n  using TConnectCallback = std::function<void(int status)>;\n\n  explicit ConnectRequest(TConnectCallback fn)\n      : connectCallback_(std::move(fn)) {}\n\n  static int perform(\n      uv_tcp_t* handle,\n      const struct sockaddr* addr,\n      TConnectCallback fn) {\n    auto request = std::make_unique<ConnectRequest>(std::move(fn));\n    auto rv = uv_tcp_connect(request->ptr(), handle, addr, uvConnectCb);\n    request.release();\n    return rv;\n  }\n\n private:\n  TConnectCallback connectCallback_;\n};\n\nclass TCPHandle : public StreamHandle<TCPHandle, uv_tcp_t> {\n public:\n  using StreamHandle<TCPHandle, uv_tcp_t>::StreamHandle;\n\n  void initFromLoop() {\n    TP_DCHECK(this->executor_.inLoop());\n    int rv;\n    rv = uv_tcp_init(loop_, this->ptr());\n    TP_THROW_UV_IF(rv < 0, rv);\n    rv = uv_tcp_nodelay(this->ptr(), 1);\n    TP_THROW_UV_IF(rv < 0, rv);\n  }\n\n  [[nodiscard]] int bindFromLoop(const Sockaddr& addr) {\n    TP_DCHECK(this->executor_.inLoop());\n    auto rv = uv_tcp_bind(ptr(), addr.addr(), 0);\n    // We don't throw in case of errors here because sometimes we bind in order\n    // to try if an address works and want to handle errors gracefully.\n    return rv;\n  }\n\n  Sockaddr sockNameFromLoop() {\n    TP_DCHECK(this->executor_.inLoop());\n    struct sockaddr_storage ss;\n    struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&ss);\n    int addrlen = sizeof(ss);\n    auto rv = uv_tcp_getsockname(ptr(), addr, &addrlen);\n    TP_THROW_UV_IF(rv < 0, rv);\n    return Sockaddr(addr, addrlen);\n  }\n\n  void connectFromLoop(\n      const Sockaddr& addr,\n      ConnectRequest::TConnectCallback fn) {\n    TP_DCHECK(this->executor_.inLoop());\n    auto rv = ConnectRequest::perform(ptr(), addr.addr(), std::move(fn));\n    TP_THROW_UV_IF(rv < 0, rv);\n  }\n};\n\nstruct AddrinfoDeleter {\n  void operator()(struct addrinfo* ptr) const {\n    uv_freeaddrinfo(ptr);\n  }\n};\n\nusing Addrinfo = std::unique_ptr<struct addrinfo, AddrinfoDeleter>;\n\ninline std::tuple<int, Addrinfo> getAddrinfoFromLoop(\n    uv_loop_t* loop,\n    std::string hostname) {\n  struct addrinfo hints;\n  std::memset(&hints, 0, sizeof(hints));\n  hints.ai_family = AF_UNSPEC;\n  hints.ai_socktype = SOCK_STREAM;\n  hints.ai_protocol = IPPROTO_TCP;\n\n  uv_getaddrinfo_t request;\n  // Don't use a callback, and thus perform the call synchronously, because the\n  // asynchronous version uses a thread pool, and it's not worth spawning new\n  // threads for a functionality which is used so sparingly.\n  auto rv = uv_getaddrinfo(\n      loop,\n      &request,\n      /*getaddrinfo_cb=*/nullptr,\n      hostname.c_str(),\n      /*service=*/nullptr,\n      &hints);\n  if (rv != 0) {\n    return std::make_tuple(rv, Addrinfo());\n  }\n\n  return std::make_tuple(0, Addrinfo(request.addrinfo, AddrinfoDeleter()));\n}\n\nstruct InterfaceAddressesDeleter {\n  explicit InterfaceAddressesDeleter(int count) : count_(count) {}\n\n  InterfaceAddressesDeleter() = default;\n\n  void operator()(uv_interface_address_t* ptr) const {\n    uv_free_interface_addresses(ptr, count_);\n  }\n\n private:\n  int count_{-1};\n};\n\nusing InterfaceAddresses =\n    std::unique_ptr<uv_interface_address_t[], InterfaceAddressesDeleter>;\n\ninline std::tuple<int, InterfaceAddresses, int> getInterfaceAddresses() {\n  uv_interface_address_t* info;\n  int count;\n  auto rv = uv_interface_addresses(&info, &count);\n  if (rv != 0) {\n    return std::make_tuple(rv, InterfaceAddresses(), 0);\n  }\n  return std::make_tuple(\n      0, InterfaceAddresses(info, InterfaceAddressesDeleter(count)), count);\n}\n\ninline std::tuple<int, std::string> getHostname() {\n  std::array<char, UV_MAXHOSTNAMESIZE> hostname;\n  size_t size = hostname.size();\n  auto rv = uv_os_gethostname(hostname.data(), &size);\n  if (rv != 0) {\n    return std::make_tuple(rv, std::string());\n  }\n  return std::make_tuple(\n      0, std::string(hostname.data(), hostname.data() + size));\n}\n\ninline std::string formatUvError(int status) {\n  if (status == 0) {\n    return \"success\";\n  } else {\n    std::ostringstream ss;\n    ss << uv_err_name(status) << \": \" << uv_strerror(status);\n    return ss.str();\n  }\n}\n\n} // namespace uv\n} // namespace transport\n} // namespace tensorpipe\n"
  },
  {
    "path": "third_party/README.md",
    "content": "# third_party\n\nThis directory includes dependencies as [submodules][submodules].\n\n[submodules]: https://git-scm.com/book/en/v2/Git-Tools-Submodules\n\n## Build dependencies\n\n* **libuv** is a multi-platform support library with a focus on asynchronous I/O.\n\n## Test dependencies\n\n* **backward-cpp** is a beautiful stack trace pretty printer for C++.\n* **googletest** is a C++ test framework.\n"
  }
]